Author | SHA1 | Message | Date |
---|---|---|---|
|
caf8b80936 | update RELEASE.md. | 4 years ago |
|
8770bfcdd7 |
!1526 update commite id
From: @shenwei41 Reviewed-by: @liucunwei,@xsmq Signed-off-by: @liucunwei |
4 years ago |
|
329a144864 | Merge remote-tracking branch 'origin/r1.2' into code_sync_0415 | 4 years ago |
|
da1a60bc02 |
!1512 multi-thread online infer
From: @HW_KK Reviewed-by: @ji_chen,@wqtshg,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
76c0c3a371 | multi-thread online infer | 4 years ago |
|
2d446b8def |
!1503 change model_name for dump
From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
c144b4bb9e |
!1507 Adaptation run package 0412
From: @shenwei41 Reviewed-by: @lilongfei15,@liucunwei Signed-off-by: @lilongfei15,@liucunwei |
4 years ago |
|
4928f86819 | update commit id | 4 years ago |
|
b5a55e9ca9 | Merge remote-tracking branch 'origin/r1.2' into code_sync_0412 | 4 years ago |
|
48d7b6dc8b | fix | 4 years ago |
|
daf8e56d25 | fix | 4 years ago |
|
46156bf04f | fix dump | 4 years ago |
|
ee67c45a2b |
!1487 Fix hccl control dependency
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
4b90851c68 |
!1484 remove unused func InsertMemcpyNode
From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
2e8d863a1e |
!1482 ge static check
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
44415f12c8 |
!1492 modify single op dump bug in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
36f2c837bf | Fix hccl control dependency | 4 years ago |
|
f49599b6c5 | modify single op dump bug | 4 years ago |
|
99e607c6d1 |
!1490 fix optional input bug
From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
d5f56ad31c | fix optional input bug | 4 years ago |
|
c73a3c7b46 | fix sc check error | 4 years ago |
|
f971f512e3 | static check modify | 4 years ago |
|
7f73eedb8a |
!1478 Don't reset -2 when there is aicore op.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
ed941d6d87 |
!1461 modify dump single op in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
089b82e9bd |
!1469 modify dynamic shape dump in c77
From: @zhou_chao1993 Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy |
4 years ago |
|
e52c916f56 | Don't reset -2 when there is aicore op. | 4 years ago |
|
4c8e5f73c6 |
!1476 Bugfix: Missing hccl execution dependency due to wrong attribute type of _parallel_group
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
a4783ff468 |
!1460 Reduce weight memory usage & Remove redundant memcpy
From: @xchu42 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
19d1f804c7 | Bugfix: keep hccl control dependency | 4 years ago |
|
c90cae1410 | modify dynamic shape dump | 4 years ago |
|
4c0d85693a |
!1463 Save atomic kernel bin to model.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
b48ecfe347 | Save atomic kernel bin to model. | 4 years ago |
|
d7b607dc83 |
!1464 fix aipp check
From: @wangxiaotian22 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
637bcc86d6 | modify dump single op | 4 years ago |
|
30743e1e59 | fix aipp check | 4 years ago |
|
24b2437361 | Fix dump for known-shaped subgraph | 4 years ago |
|
6d92a616ea |
!1455 Synchronize latest Ascend software suite 06 Apr 2021
From: @nicholas_yhr Reviewed-by: @majorzhang,@lilongfei15 Signed-off-by: @majorzhang |
4 years ago |
|
03e87b5570 | Merge remote-tracking branch 'upstream/r1.2' into code_sync_0406 | 4 years ago |
|
3ef3f54d94 | Save atomic kernel bin to model. | 4 years ago |
|
34f09f4fc8 |
!1447 LinkToPotentialPrecedenceNode
From: @dimitri_rose Reviewed-by: @sheng-nan,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
73e7c53f8a |
!1448 Fix bug of const input index.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
494fa061a8 |
!1444 modify dump content in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
aeec1cb08b |
!1446 modify set dump in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
960cc1fd64 | Fix bug of const input index. | 4 years ago |
|
5f1e659fcd | LinkToPotentialPrecedenceNode | 4 years ago |
|
b1822cc73c | modify set dump in c77 | 4 years ago |
|
4931c4fa1e | modify dump content | 4 years ago |
|
24d3b54ab8 |
!1443 synchronize latest ascend softare suite 02 Apr 2021
From: @nicholas_yhr Reviewed-by: @lilongfei15,@ljl0711 Signed-off-by: @lilongfei15 |
4 years ago |
|
2fbf01c53f | Merge remote-tracking branch 'origin/r1.2' into code_sync_0402 | 4 years ago |
|
9d6aaa117c |
!1419 Add GetOriginalType for support RefSwitch & RefMerge
From: @chen_yemeng Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
0da36c04e4 |
!1421 fixed sc warning
From: @li-lei0106 Reviewed-by: @wqtshg,@xchu42 Signed-off-by: @ji_chen |
4 years ago |
|
2ac43d4033 |
!1430 fix 1951 ts 4g bug
From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
2112a36e80 |
!1415 support unknown while subgraph
From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
68595a656a | fix ts 4g memory bug | 4 years ago |
|
890373c79c | fixed reviewbot warning | 4 years ago |
|
7a40a575f7 | Add GetOriginalType for support RefSwitch & RefMerge | 4 years ago |
|
701b0d6c1b | support unknown while subgraph | 4 years ago |
|
da71533e55 |
!1345 fixed sc warning
From: @li-lei0106 Reviewed-by: Signed-off-by: |
4 years ago |
|
af83c480c5 |
!1388 Feature: Tiger online inference support
From: @hugo1 Reviewed-by: @xchu42,@ji_chen,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
c936821629 | modified: metadef | 4 years ago |
|
971630a7d2 |
!1400 Bugfix: While loop failed to restore original input after execution
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
1735e1b1f3 |
!1402 l2 buffer for f1.3.0
From: @youui Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
12cef9e9b9 | support unknown while subgraph | 4 years ago |
|
0679af1d75 |
!1409 update include files 0330
From: @shenwei41 Reviewed-by: @lilongfei15,@wenkai_dist,@ljl0711 Signed-off-by: @lilongfei15 |
4 years ago |
|
5ddf2ac2b2 | update include files to 1.2 | 4 years ago |
|
7516130c7e | delete code | 4 years ago |
|
de47249a72 |
!1407 update commit id to r1.2 0330
From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
df1592e97a | Merge remote-tracking branch 'origin/r1.2' into code_sync_0330 | 4 years ago |
|
7ec6e4fe61 | r13_l2 | 4 years ago |
|
7ed03d0d0e |
!1398 fix import
From: @youui Reviewed-by: @ljl0711,@liujunzhu Signed-off-by: @liujunzhu |
4 years ago |
|
1d0359d1c6 | fixed pclint warning | 4 years ago |
|
e9868abe29 | fixed sc warning by wangxiaotian | 4 years ago |
|
4fe73f77bc | fixed sc warning | 4 years ago |
|
59a3e2e0ff | fix import | 4 years ago |
|
4a7f623b12 | while loop failed to restore input desc | 4 years ago |
|
8e0634323d | modified: ge/graph/passes/base_pass.h | 4 years ago |
|
f19cd2fca9 |
!1386 Adding dependencies by parallel groups
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
c691f2a7d7 |
!1385 Fix error of single_op memory free.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
e2f04ddabd |
!1375 bugfix for atomic_addr_clean_pass
From: @yangyongqiang5033 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
50552c3631 |
modified: ge/graph/passes/base_pass.cc
modified: ge/graph/passes/base_pass.h modified: ge/graph/passes/infershape_pass.cc modified: ge/hybrid/executor/hybrid_model_async_executor.cc modified: ge/hybrid/executor/subgraph_executor.cc modified: ge/hybrid/node_executor/aicore/aicore_op_task.cc |
4 years ago |
|
167621141b | hccl ops with same parallel group can not be execute parallelly | 4 years ago |
|
aad154cdf1 | Fix error of single_op memory free. | 4 years ago |
|
aead0be2d6 |
!1372 online_inference c77
From: @dimitri_rose Reviewed-by: @ji_chen,@sheng-nan Signed-off-by: @ji_chen |
4 years ago |
|
2cf49ced1c | online_inference c77 | 4 years ago |
|
b8621d9d0e |
!1370 bugfix for auto find fp
From: @ni100die Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
9d34427af9 | bugfix for atomic_addr_clean_pass | 4 years ago |
|
37c928ed29 | bugfix for auto find fp | 4 years ago |
|
0901ca5581 |
!1337 Fix bug of single_op inferdepend.
From: @zhao_zhixuan Reviewed-by: @xchu42,@xchu42,@ji_chen Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
1224cdee8a |
!1306 dump for unknownshape
From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
56007bea30 |
!1351 sync runtime head
From: @zhou_chao1993 Reviewed-by: @xchu42,@youui,@ji_chen Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
168508b063 |
!1354 update include file 0325
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
23b471ca2b | update include file 0325 | 4 years ago |
|
e2f929b761 |
!1352 update graphengine_0325
From: @shenwei41 Reviewed-by: @majorzhang,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
518ac24516 | update metdef | 4 years ago |
|
a6bcb04c7f | Merge remote-tracking branch 'origin/r1.2' into code_sync_0325 | 4 years ago |
|
5367bbe395 |
!1316 fixed compiled issue for proto files
From: @li-lei0106 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
f0d897b0bb | fixed compiled issue for proto files | 4 years ago |
|
13ecbe405a | sync runtime head | 4 years ago |
|
3050d3984a |
!1307 fix bug of dynamic shape load error
From: @wan_xuelei Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy |
4 years ago |
|
1ccd0dd9ee |
!1341 modify profiing reporter data max len
From: @zhengyuanhua Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy |
4 years ago |
|
97d93adaa5 | modify profiling reporter data max len | 4 years ago |
|
4238e11e99 |
!1339 refactor label manager
From: @zhoufeng54 Reviewed-by: @liujunzhu,@youui Signed-off-by: @youui |
4 years ago |
|
ba2fcefa04 |
refactor label manager
Signed-off-by: zhoufeng <zhoufeng54@huawei.com> |
4 years ago |
|
77d5468cf6 | Fix bug of single_op inferdepend. | 4 years ago |
|
a89113e743 | fix bug of dynamic shape load error | 4 years ago |
|
e3fbf4d860 |
!1277 offline dynamic shape inference support
From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
36eb9620d4 |
!1304 label goto implentment modified to be same as label switch
From: @zhoufeng54 Reviewed-by: @liujunzhu,@majorzhang Signed-off-by: @majorzhang |
4 years ago |
|
179e10f36b |
label switch
Signed-off-by: zhupuxu <zhupuxu@huawei.com> |
4 years ago |
|
745153a252 |
!1302 update include headers 0318
From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei |
4 years ago |
|
f87dd9d016 | update include headers | 4 years ago |
|
d9d99c3cf5 |
!1300 Update GE commit id
From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei |
4 years ago |
|
dfb2f4b7af | update commit id format | 4 years ago |
|
4a18a6791d | Merge remote-tracking branch 'origin/r1.2' into code_sync_0318 | 4 years ago |
|
e6d3c77e80 | offline dynamic shape inference support | 4 years ago |
|
81ac111f09 |
!1259 Unique LabelGoto args addr
From: @zhangxiaokun9 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
6e874e8b87 | Unique LabelGoto args addr | 4 years ago |
|
4d6e7acc14 |
!1252 update submodule
From: @wqtshg Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
67bdf03f4b | update submodule | 4 years ago |
|
7f94f51ef2 |
!1249 fix bug that all subgraph is unknown and netoutput format is not nd bug
From: @wan_xuelei Reviewed-by: @xchu42,@xchu42,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
b7a534a90a |
!1248 modify error log
From: @wangxiaotian22 Reviewed-by: Signed-off-by: |
4 years ago |
|
5e3d450ac8 |
!1245 add optune support for dynamic shape
From: @ni100die Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
7019c576c5 |
!1246 update onnx format
From: @yangyongqiang5033 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
b507314cd2 |
!1244 modify user input shape check
From: @zhengyuanhua Reviewed-by: @xchu42,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
ee95f078b4 | for ut cov | 4 years ago |
|
08ecc484fd | fix bug that all subgraph is unknown and netoutput format is not nd bug | 4 years ago |
|
eadebcb9ff | delete fail ut | 4 years ago |
|
64c4bbd8e7 | fix bug that all subgraph is unknown and netoutput format is not nd bug | 4 years ago |
|
b9e1a62faf | fix ut | 4 years ago |
|
e35eddf16a | fix ut | 4 years ago |
|
5d7eab5a4b | fix ut | 4 years ago |
|
723f398670 | fix bug that all subgraph is unknown and netoutput format is not nd bug | 4 years ago |
|
79ae2d3fe1 | fix | 4 years ago |
|
49aacf9e6f | add for optune dynamic shape support | 4 years ago |
|
a44695f245 | fix | 4 years ago |
|
e8dd99af4e | for ut cov | 4 years ago |
|
48e0a68b62 | add for optune dynamic shape support | 4 years ago |
|
c211d1bd36 | modify error log | 4 years ago |
|
ed6a811c15 | checkout input user dim modify | 4 years ago |
|
f8479e16d5 | add for optune dynamic shape support | 4 years ago |
|
bf14833ef4 | add for optune dynamic shape support | 4 years ago |
|
82b32210e2 |
!1238 static check 0312
From: @lwx911747 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
e73d9f89b8 |
!1204 Bugfix: Tiger online inference change on executor
From: @hugo1 Reviewed-by: Signed-off-by: |
4 years ago |
|
5acba13261 |
modified: concat_offset_kernel.cc
modified: gather_v2_kernel.cc modified: strided_slice_kernel.cc |
4 years ago |
|
d74016165b |
!1237 remove check shape by shape range
From: @zhengyuanhua Reviewed-by: @xchu42,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
56ff720fac | modified: ../../tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 4 years ago |
|
a63df26ead | static check 0312 | 4 years ago |
|
801a1e0fca |
modified: gather_v2_kernel.cc
modified: strided_slice_kernel.cc modified: ../../tests/ut/ge/hybrid/ge_hybrid_unittest.cc |
4 years ago |
|
992c791c7c |
!1236 move set stage in finalize
From: @wangxiaotian22 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
33d609ebad | remove check shape by shape range | 4 years ago |
|
074e7d4f8c | update onnx format | 4 years ago |
|
9f027029d5 |
!1224 add process for some op need infershape when running
From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
3df7893398 | move setstage in finalize | 4 years ago |
|
c5b77a0124 |
!1229 revert get log header
From: @wangxiaotian22 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @wqtshg |
4 years ago |
|
bab9bca596 | modified: ge/graph/partition/dynamic_shape_partition.cc | 4 years ago |
|
ce83b1569d | modified: tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 4 years ago |
|
bcd0fc5111 | modify clang | 4 years ago |
|
2532144c77 | revert get logheader | 4 years ago |
|
1227e0339f | add force infershape for some op | 4 years ago |
|
305138f9f7 |
!1225 Replace rtLabelGotoEx by rtLabelSwitchByIndex
From: @zhangxiaokun9 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
2ece5f3b63 |
!1218 inference dynamic input
From: @zhengyuanhua Reviewed-by: @wan_xuelei,@wqtshg,@xchu42 Signed-off-by: @ljl0711 |
4 years ago |
|
f7d8c04c57 |
!1226 modify data dumper
From: @zhou_chao1993 Reviewed-by: @xchu42,@youui Signed-off-by: @youui |
4 years ago |
|
a028954204 |
!1222 error log modify
From: @wangxiaotian22 Reviewed-by: Signed-off-by: |
4 years ago |
|
684093d759 | modify data dumper | 4 years ago |
|
390905b877 | modify | 4 years ago |
|
11c9476b41 | modify | 4 years ago |
|
cd16a4215d | Fix util.h Check clang-format | 4 years ago |
|
395fddbcff | fix ut core | 4 years ago |
|
8d8786bfd2 | Replace rtLabelGotoEx by rtLabelSwitchByIndex | 4 years ago |
|
5ae267433b | add force infershape for some op | 4 years ago |
|
19a55bcdb4 | modify | 4 years ago |
|
919753675f | add ut depend | 4 years ago |
|
612463e089 | modified: tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 4 years ago |
|
342944505a | fix clang | 4 years ago |
|
365401b52f | add force infershape for some op | 4 years ago |
|
0f4cf5a291 | fix clang | 4 years ago |
|
0d09bdb890 | ERROR log modify | 4 years ago |
|
08206700f9 | inference dynamic input | 4 years ago |
|
3d0a83a455 | modified: tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 4 years ago |
|
c94e0fbdc6 | add force infershape for some op | 4 years ago |
|
5fe85f3f85 | modified: ge/graph/partition/dynamic_shape_partition.cc | 4 years ago |
|
58a3e06c17 |
modified: ge/graph/build/graph_builder.cc
modified: ge/graph/load/model_manager/model_manager.cc modified: ge/host_kernels/identity_kernel.cc modified: ge/hybrid/model/hybrid_model.h modified: ge/hybrid/model/hybrid_model_builder.cc modified: ge/hybrid/node_executor/task_context.cc |
4 years ago |
|
3401ca857c | dump for unknownshape | 4 years ago |
@@ -1,8 +1,8 @@ | |||||
[submodule "parser"] | [submodule "parser"] | ||||
path = parser | path = parser | ||||
url = https://gitee.com/ascend/parser.git | url = https://gitee.com/ascend/parser.git | ||||
branch = master | |||||
branch = r1.3.0 | |||||
[submodule "metadef"] | [submodule "metadef"] | ||||
path = metadef | path = metadef | ||||
url = https://gitee.com/ascend/metadef.git | url = https://gitee.com/ascend/metadef.git | ||||
branch = master | |||||
branch = r1.3.0 |
@@ -1,3 +1,18 @@ | |||||
### Major Features and Improvements | |||||
* Multiple parallel communication groups can be distinguished and the communication tasks of different parallel communication groups can be divided into different streams. | |||||
* Parallel tasks are added for the entire map optimization. The execution sequence is optimized for parallel groups. | |||||
* Dynamic shape is supported for single operator in inference scenarios. | |||||
* Online inference supports concurrent execution of multiple threads. | |||||
* Memory allocation supports address reuse in the buffer pool. | |||||
* Supports Event resource reuse. | |||||
* Supports the BF16 data type. | |||||
## Thanks to our Contributors | |||||
Thanks goes to these wonderful people: wuweikang,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong Contributions of any kind are welcome! | |||||
Contributions of any kind are welcome! | |||||
# Release 1.0.0 | # Release 1.0.0 | ||||
## Major Features and Improvements | ## Major Features and Improvements | ||||
@@ -229,7 +229,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
rm -rf ${BASEPATH}/cov | rm -rf ${BASEPATH}/cov | ||||
mkdir ${BASEPATH}/cov | mkdir ${BASEPATH}/cov | ||||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | ||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '/usr/include/*' '*/metadef/*' '*/parser/*' -o cov/coverage.info | |||||
cd ${BASEPATH}/cov | cd ${BASEPATH}/cov | ||||
genhtml coverage.info | genhtml coverage.info | ||||
fi | fi | ||||
@@ -31,6 +31,7 @@ set(PROTO_HEADER_LIST | |||||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST}) | protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST}) | ||||
protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) | protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) | ||||
protobuf_generate(ge_client PROTO_CLIENT_HEADER_SRCS PROTO_CLIENT_HEADER_HDRS ${PROTO_HEADER_LIST}) | |||||
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | ||||
############ libge_proto_common.a ############ | ############ libge_proto_common.a ############ | ||||
@@ -56,7 +57,7 @@ target_link_libraries(ge_proto_common PRIVATE | |||||
############ libge_proto_client.a ############ | ############ libge_proto_client.a ############ | ||||
add_library(ge_proto_client STATIC | add_library(ge_proto_client STATIC | ||||
${PROTO_HEADER_HDRS} | |||||
${PROTO_CLIENT_HEADER_HDRS} | |||||
${PROTO_CLIENT_SRCS} | ${PROTO_CLIENT_SRCS} | ||||
) | ) | ||||
@@ -65,6 +66,11 @@ target_compile_definitions(ge_proto_client PRIVATE | |||||
google=ascend_private | google=ascend_private | ||||
) | ) | ||||
target_include_directories(ge_proto_client PRIVATE | |||||
${CMAKE_BINARY_DIR}/proto/ge_client | |||||
${CMAKE_BINARY_DIR}/proto/ge_client/proto | |||||
) | |||||
target_compile_options(ge_proto_client PRIVATE | target_compile_options(ge_proto_client PRIVATE | ||||
-O2 | -O2 | ||||
-fno-common | -fno-common | ||||
@@ -937,6 +943,10 @@ add_library(atc_stub_ge_compiler SHARED | |||||
add_dependencies(atc_stub_ge_compiler ge_stub) | add_dependencies(atc_stub_ge_compiler ge_stub) | ||||
target_compile_options(atc_stub_ge_compiler PRIVATE | |||||
-fno-common | |||||
) | |||||
target_link_libraries(atc_stub_ge_compiler PRIVATE | target_link_libraries(atc_stub_ge_compiler PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
) | ) | ||||
@@ -221,7 +221,10 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||||
try { | try { | ||||
json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | ||||
} catch (nlohmann::detail::type_error &e) { | } catch (nlohmann::detail::type_error &e) { | ||||
GELOGE(FAILED, "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s], session_id:%lu, graph_id:%lu", json_file_name_.c_str(), e.what(), session_id, graph_id); | |||||
GELOGE(FAILED, | |||||
"[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s]," | |||||
"session_id:%lu, graph_id:%lu", | |||||
json_file_name_.c_str(), e.what(), session_id, graph_id); | |||||
ret_failed = true; | ret_failed = true; | ||||
} | } | ||||
json_file_.close(); | json_file_.close(); | ||||
@@ -241,7 +244,9 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | |||||
GE_CHECK_NOTNULL(graph_info); | GE_CHECK_NOTNULL(graph_info); | ||||
auto status = SaveOpInfo(desc, data_info, graph_info); | auto status = SaveOpInfo(desc, data_info, graph_info); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", desc->GetName().c_str(), desc->GetType().c_str()); | |||||
GELOGE(status, | |||||
"[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", | |||||
desc->GetName().c_str(), desc->GetType().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// create json file | // create json file | ||||
@@ -171,17 +171,17 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||||
// GE finalize, releasing all resources | // GE finalize, releasing all resources | ||||
Status GEFinalize() { | Status GEFinalize() { | ||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); | |||||
GELOGT(TRACE_INIT, "GEFinalize start"); | |||||
ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||||
// check init status | // check init status | ||||
if (!g_ge_initialized) { | if (!g_ge_initialized) { | ||||
GELOGW("GEFinalize is called before GEInitialize"); | |||||
GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); | |||||
ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
GELOGT(TRACE_INIT, "GEFinalize start"); | |||||
// call Finalize | // call Finalize | ||||
Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
Status middle_ret; | Status middle_ret; | ||||
@@ -16,6 +16,7 @@ set(PROTO_LIST | |||||
) | ) | ||||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST}) | |||||
set(SRC_LIST | set(SRC_LIST | ||||
"context/ctx.cc" | "context/ctx.cc" | ||||
@@ -127,7 +128,7 @@ target_link_libraries(ge_common PRIVATE | |||||
) | ) | ||||
############ libge_common.a ############ | ############ libge_common.a ############ | ||||
add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS}) | |||||
add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS}) | |||||
target_compile_definitions(ge_common_static PRIVATE | target_compile_definitions(ge_common_static PRIVATE | ||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
HOST_VISIBILITY | HOST_VISIBILITY | ||||
@@ -158,7 +159,7 @@ target_include_directories(ge_common_static PRIVATE | |||||
${METADEF_DIR}/inc/external/graph | ${METADEF_DIR}/inc/external/graph | ||||
${METADEF_DIR}/inc/graph | ${METADEF_DIR}/inc/graph | ||||
${CMAKE_BINARY_DIR} | ${CMAKE_BINARY_DIR} | ||||
${CMAKE_BINARY_DIR}/proto/ge | |||||
${CMAKE_BINARY_DIR}/proto/ge_static | |||||
#### yellow zone #### | #### yellow zone #### | ||||
${GE_DEPEND_DIR}/inc | ${GE_DEPEND_DIR}/inc | ||||
${GE_DEPEND_DIR}/inc/cce | ${GE_DEPEND_DIR}/inc/cce | ||||
@@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||||
dump_mode = dump_config.dump_mode; | dump_mode = dump_config.dump_mode; | ||||
GELOGI("Dump mode is %s", dump_mode.c_str()); | GELOGI("Dump mode is %s", dump_mode.c_str()); | ||||
dump_properties.SetDumpMode(dump_mode); | dump_properties.SetDumpMode(dump_mode); | ||||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||||
dump_properties_map_[kInferSessionId] = dump_properties; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -20,6 +20,7 @@ | |||||
#include "common/ge/datatype_util.h" | #include "common/ge/datatype_util.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "framework/common/types.h" | |||||
#include "graph/anchor.h" | #include "graph/anchor.h" | ||||
#include "graph/ge_tensor.h" | #include "graph/ge_tensor.h" | ||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
@@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond | |||||
loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); | loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); | ||||
} | } | ||||
void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) { | |||||
void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, | |||||
uint32_t dynamic_model_id) { | |||||
dynamic_model_name_ = dynamic_model_name; | dynamic_model_name_ = dynamic_model_name; | ||||
dynamic_om_name_ = dynamic_om_name; | |||||
dynamic_model_id_ = dynamic_model_id; | dynamic_model_id_ = dynamic_model_id; | ||||
} | } | ||||
@@ -200,6 +203,32 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) { | |||||
GELOGI("Single op dump, no need set model name"); | |||||
return SUCCESS; | |||||
} | |||||
std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | |||||
bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end(); | |||||
bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end(); | |||||
std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_; | |||||
if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | |||||
if (not_find_by_omname && not_find_by_modelname) { | |||||
std::string model_list_str; | |||||
for (auto &model : model_list) { | |||||
model_list_str += "[" + model + "]."; | |||||
} | |||||
GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) { | |||||
GELOGD("Dump model name is %s", dump_model_name.c_str()); | |||||
op_mapping_info.set_model_name(dump_model_name); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status DumpOp::LaunchDumpOp() { | Status DumpOp::LaunchDumpOp() { | ||||
GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | ||||
int32_t device_id = 0; | int32_t device_id = 0; | ||||
@@ -209,8 +238,7 @@ Status DumpOp::LaunchDumpOp() { | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
if (device_id < 0) { | if (device_id < 0) { | ||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"Check device_id failed, device_id = %d, which should be not less than 0.", | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.", | |||||
device_id); | device_id); | ||||
return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
} | } | ||||
@@ -220,11 +248,12 @@ Status DumpOp::LaunchDumpOp() { | |||||
op_mapping_info.set_flag(kAicpuLoadFlag); | op_mapping_info.set_flag(kAicpuLoadFlag); | ||||
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | ||||
op_mapping_info.set_model_id(dynamic_model_id_); | op_mapping_info.set_model_id(dynamic_model_id_); | ||||
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||||
op_mapping_info.set_model_name(dynamic_model_name_); | |||||
if (SetDumpModelName(op_mapping_info) != SUCCESS) { | |||||
return SUCCESS; | |||||
} | } | ||||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | ||||
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||||
GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||||
dump_path.c_str()); | dump_path.c_str()); | ||||
uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
@@ -273,4 +302,4 @@ Status DumpOp::LaunchDumpOp() { | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namesapce ge | |||||
} // namespace ge |
@@ -34,12 +34,13 @@ class DumpOp { | |||||
vector<uintptr_t> output_addrs, rtStream_t stream); | vector<uintptr_t> output_addrs, rtStream_t stream); | ||||
Status LaunchDumpOp(); | Status LaunchDumpOp(); | ||||
void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | ||||
void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id); | |||||
void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id); | |||||
private: | private: | ||||
Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); | Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); | ||||
Status DumpOutput(aicpu::dump::Task &task); | Status DumpOutput(aicpu::dump::Task &task); | ||||
Status DumpInput(aicpu::dump::Task &task); | Status DumpInput(aicpu::dump::Task &task); | ||||
Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info); | |||||
DumpProperties dump_properties_; | DumpProperties dump_properties_; | ||||
OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
@@ -54,6 +55,7 @@ class DumpOp { | |||||
uintptr_t loop_cond_; | uintptr_t loop_cond_; | ||||
std::string dynamic_model_name_; | std::string dynamic_model_name_; | ||||
std::string dynamic_om_name_; | |||||
std::uint32_t dynamic_model_id_; | std::uint32_t dynamic_model_id_; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on"; | |||||
const uint32_t kAicoreOverflow = (0x1 << 0); | const uint32_t kAicoreOverflow = (0x1 << 0); | ||||
const uint32_t kAtomicOverflow = (0x1 << 1); | const uint32_t kAtomicOverflow = (0x1 << 1); | ||||
const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | ||||
} | |||||
} // namespace | |||||
namespace ge { | namespace ge { | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | ||||
CopyFrom(other); | CopyFrom(other); | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | ||||
const DumpProperties &other) { | |||||
const DumpProperties &other) { | |||||
CopyFrom(other); | CopyFrom(other); | ||||
return *this; | return *this; | ||||
} | } | ||||
@@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||||
// The following is the new dump scenario of the fusion operator | // The following is the new dump scenario of the fusion operator | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | ||||
const std::string &model, const std::set<std::string> &layers) { | |||||
const std::string &model, const std::set<std::string> &layers) { | |||||
for (const std::string &layer : layers) { | for (const std::string &layer : layers) { | ||||
GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | ||||
} | } | ||||
@@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope | |||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | ||||
const std::string &model) const { | |||||
const std::string &model) const { | |||||
auto iter = model_dump_properties_map_.find(model); | auto iter = model_dump_properties_map_.find(model); | ||||
if (iter != model_dump_properties_map_.end()) { | if (iter != model_dump_properties_map_.end()) { | ||||
return iter->second; | return iter->second; | ||||
@@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope | |||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | ||||
const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||||
const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||||
// if dump all | // if dump all | ||||
GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str()); | |||||
if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | ||||
return true; | return true; | ||||
} | } | ||||
@@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | ||||
const std::string &dump_op_switch) { | |||||
const std::string &dump_op_switch) { | |||||
dump_op_switch_ = dump_op_switch; | dump_op_switch_ = dump_op_switch; | ||||
} | } | ||||
@@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() { | |||||
GELOGI("ge.exec.enableDumpDebug is false or is not set."); | GELOGI("ge.exec.enableDumpDebug is false or is not set."); | ||||
} | } | ||||
} | } | ||||
} // namespace | |||||
} // namespace ge |
@@ -80,13 +80,11 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de | |||||
uint32_t debug_stream_id = 0; | uint32_t debug_stream_id = 0; | ||||
uint32_t debug_task_id = 0; | uint32_t debug_task_id = 0; | ||||
#ifdef ONLY_COMPILE_OPEN_SRC | |||||
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
#endif | |||||
GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | ||||
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -94,7 +92,6 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de | |||||
void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | ||||
rtError_t rt_ret = RT_ERROR_NONE; | rtError_t rt_ret = RT_ERROR_NONE; | ||||
#ifdef ONLY_COMPILE_OPEN_SRC | |||||
if (stream != nullptr) { | if (stream != nullptr) { | ||||
GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | ||||
rt_ret = rtDebugUnRegisterForStream(stream); | rt_ret = rtDebugUnRegisterForStream(stream); | ||||
@@ -102,8 +99,6 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||||
GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | ||||
} | } | ||||
} | } | ||||
#endif | |||||
if (op_debug_addr_ != nullptr) { | if (op_debug_addr_ != nullptr) { | ||||
rt_ret = rtFree(op_debug_addr_); | rt_ret = rtFree(op_debug_addr_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
@@ -154,7 +154,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
} | } | ||||
@@ -73,7 +73,8 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -94,7 +94,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -122,7 +123,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||||
args.data + src_idx * data_size, static_cast<size_t>(data_size)); | args.data + src_idx * data_size, static_cast<size_t>(data_size)); | ||||
} | } | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
@@ -95,7 +95,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -123,7 +124,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||||
args.data + src_idx * data_size, static_cast<size_t>(data_size)); | args.data + src_idx * data_size, static_cast<size_t>(data_size)); | ||||
} | } | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
@@ -139,7 +139,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -175,7 +176,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||||
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -189,7 +191,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||||
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -210,7 +213,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -246,7 +250,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -260,7 +265,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -274,14 +280,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) { | ||||
if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
return ACL_ERROR_GE_DATATYPE_INVALID; | return ACL_ERROR_GE_DATATYPE_INVALID; | ||||
} | } | ||||
if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | ||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
@@ -325,7 +333,8 @@ Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector | |||||
Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) { | ||||
if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
@@ -333,7 +342,8 @@ Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult | |||||
} | } | ||||
if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | ||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
@@ -127,7 +127,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
dst == nullptr, | dst == nullptr, | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
@@ -173,8 +174,9 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||||
} | } | ||||
} | } | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset, | |||||
ret, need_pad_zero); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", | |||||
offset, ret, need_pad_zero); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -213,7 +215,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
dst == nullptr, | dst == nullptr, | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
@@ -235,7 +238,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||||
static_cast<size_t>(data_size)); | static_cast<size_t>(data_size)); | ||||
} else { | } else { | ||||
if (protected_size < data_size) { | if (protected_size < data_size) { | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
"Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
protected_size, data_size); | protected_size, data_size); | ||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
} | } | ||||
@@ -247,7 +251,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||||
} | } | ||||
} | } | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
@@ -288,7 +293,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
dst == nullptr, | dst == nullptr, | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
@@ -310,7 +316,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||||
static_cast<size_t>(data_size)); | static_cast<size_t>(data_size)); | ||||
} else { | } else { | ||||
if (protected_size < data_size) { | if (protected_size < data_size) { | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
"Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
protected_size, data_size); | protected_size, data_size); | ||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
} | } | ||||
@@ -322,7 +329,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||||
} | } | ||||
} | } | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
@@ -140,7 +140,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -179,7 +180,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||||
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -195,7 +197,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||||
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -217,7 +220,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -257,7 +261,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -273,7 +278,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
if (ret != EOK) { | if (ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -288,14 +294,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) { | ||||
if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
return ACL_ERROR_GE_DATATYPE_INVALID; | return ACL_ERROR_GE_DATATYPE_INVALID; | ||||
} | } | ||||
if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | ||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
@@ -339,7 +347,8 @@ Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector | |||||
Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) { | ||||
if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
@@ -347,7 +356,8 @@ Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult | |||||
} | } | ||||
if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | ||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
@@ -66,7 +66,7 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||||
FmtToStr(ShapeToString(dst_shape)); | FmtToStr(ShapeToString(dst_shape)); | ||||
GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | ||||
return ACL_ERROR_GE_SHAPE_INVALID; | return ACL_ERROR_GE_SHAPE_INVALID; | ||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -74,7 +74,8 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -59,9 +59,10 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||||
} | } | ||||
int64_t c1 = Ceil(dst_shape.at(kNchwC), c0); | int64_t c1 = Ceil(dst_shape.at(kNchwC), c0); | ||||
int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize)); | int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize)); | ||||
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || src_shape.at(kFracZC0) != c0 || | |||||
src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || | |||||
src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
"Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ||||
return ACL_ERROR_GE_SHAPE_INVALID; | return ACL_ERROR_GE_SHAPE_INVALID; | ||||
} | } | ||||
@@ -72,7 +73,8 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -59,9 +59,10 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||||
} | } | ||||
int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0); | int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0); | ||||
int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize)); | int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize)); | ||||
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || src_shape.at(kFracZC0) != c0 || | |||||
src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || | |||||
src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
GELOGE(PARAM_INVALID, | |||||
"Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -72,7 +73,8 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(OUT_OF_MEMORY, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return OUT_OF_MEMORY; | return OUT_OF_MEMORY; | ||||
@@ -140,7 +142,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & | |||||
} | } | ||||
GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | ||||
ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||||
ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", | GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", | ||||
@@ -91,7 +91,8 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -72,7 +72,8 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(OUT_OF_MEMORY, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return OUT_OF_MEMORY; | return OUT_OF_MEMORY; | ||||
@@ -61,7 +61,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||||
if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) || | if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) || | ||||
src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 || | src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 || | ||||
src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) { | src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) { | ||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
"Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ||||
return ACL_ERROR_GE_SHAPE_INVALID; | return ACL_ERROR_GE_SHAPE_INVALID; | ||||
} | } | ||||
@@ -72,7 +73,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -125,7 +125,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||||
return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
auto t1 = h_o * w_o; | auto t1 = h_o * w_o; | ||||
auto t2 = n_o * c_o; | auto t2 = n_o * c_o; | ||||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | ||||
@@ -140,7 +141,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -212,7 +214,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||||
return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
auto t1 = h_o * w_o; | auto t1 = h_o * w_o; | ||||
auto t2 = n_o * c_o; | auto t2 = n_o * c_o; | ||||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,"int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | ||||
@@ -228,7 +231,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||||
dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -275,7 +279,8 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult | |||||
} | } | ||||
std::vector<int64_t> expect_shape; | std::vector<int64_t> expect_shape; | ||||
ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, args_tmp.dst_format, expect_shape); | |||||
ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, | |||||
args_tmp.dst_format, expect_shape); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -92,7 +92,8 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { | |||||
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
if (dst == nullptr) { | if (dst == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
@@ -87,12 +87,13 @@ Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) { | |||||
std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr(); | std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr(); | ||||
if (model_task_def == nullptr) { | if (model_task_def == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed"); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
GELOGD("SaveSizeToModelDef task_info_size is 0."); | |||||
om_info.push_back(0); | |||||
} else { | |||||
size_t partition_task_size = model_task_def->ByteSizeLong(); | |||||
GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); | |||||
om_info.push_back(partition_task_size); | |||||
} | } | ||||
size_t partition_task_size = model_task_def->ByteSizeLong(); | |||||
GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); | |||||
om_info.push_back(partition_task_size); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info), | ||||
GELOGE(FAILED, "SetListInt of om_info_list failed."); | GELOGE(FAILED, "SetListInt of om_info_list failed."); | ||||
@@ -598,6 +599,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||||
is_first_model = false; | is_first_model = false; | ||||
root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph())); | root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph())); | ||||
root_model_->SetModelId(cur_model->GetModelId()); | root_model_->SetModelId(cur_model->GetModelId()); | ||||
root_model_->SetModelName(cur_model->GetName()); | |||||
model_ = cur_model; | model_ = cur_model; | ||||
continue; | continue; | ||||
} | } | ||||
@@ -31,7 +31,7 @@ const char *const kFpPoint = "fp_point"; | |||||
const char *const kBpPoint = "bp_point"; | const char *const kBpPoint = "bp_point"; | ||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
const size_t kReportMaxLen = 2048; | |||||
const size_t kReportMaxLen = 1024; | |||||
const int32_t kMaxDeviceNum = 256; | const int32_t kMaxDeviceNum = 256; | ||||
const uint32_t kInteval = 2; | const uint32_t kInteval = 2; | ||||
const std::string kConfigNumsdev = "devNums"; | const std::string kConfigNumsdev = "devNums"; | ||||
@@ -15,6 +15,8 @@ | |||||
*/ | */ | ||||
#include "common/tbe_kernel_store.h" | #include "common/tbe_kernel_store.h" | ||||
#include "graph/utils/attr_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
namespace ge { | namespace ge { | ||||
@@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> | |||||
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | ||||
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | ||||
GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | ||||
std::string atomic_kernel_name; | |||||
(void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name); | |||||
if (!atomic_kernel_name.empty()) { | |||||
GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str()); | |||||
auto atomic_kernel_bin = FindKernel(atomic_kernel_name); | |||||
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin), | |||||
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");) | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -8,6 +8,7 @@ set(PROTO_LIST | |||||
) | ) | ||||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST}) | |||||
set(SRC_LIST | set(SRC_LIST | ||||
"ge_executor.cc" | "ge_executor.cc" | ||||
@@ -162,7 +163,7 @@ set(SRC_LIST | |||||
) | ) | ||||
######## libge_executor.a ######## | ######## libge_executor.a ######## | ||||
add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS}) | |||||
add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS}) | |||||
target_compile_options(ge_executor PRIVATE | target_compile_options(ge_executor PRIVATE | ||||
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | ||||
@@ -191,7 +192,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE | |||||
${METADEF_DIR}/inc/external/graph | ${METADEF_DIR}/inc/external/graph | ||||
${METADEF_DIR}/inc/graph | ${METADEF_DIR}/inc/graph | ||||
${CMAKE_BINARY_DIR} | ${CMAKE_BINARY_DIR} | ||||
${CMAKE_BINARY_DIR}/proto/ge | |||||
${CMAKE_BINARY_DIR}/proto/ge_static | |||||
#### yellow zone #### | #### yellow zone #### | ||||
${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
${GE_CODE_DIR}/../inc/cce | ${GE_CODE_DIR}/../inc/cce | ||||
@@ -212,6 +213,7 @@ target_link_libraries(ge_executor PRIVATE | |||||
add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS}) | add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS}) | ||||
target_compile_options(ge_executor_shared PRIVATE | target_compile_options(ge_executor_shared PRIVATE | ||||
-fno-common | |||||
-Werror | -Werror | ||||
-O2 | -O2 | ||||
-Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
@@ -30,6 +30,8 @@ | |||||
#include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
#include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
#include "graph/opsproto_manager.h" | |||||
#include "ge_local_engine/engine/host_cpu_engine.h" | |||||
using std::string; | using std::string; | ||||
using std::vector; | using std::vector; | ||||
@@ -199,6 +201,33 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||||
namespace ge { | namespace ge { | ||||
bool GeExecutor::isInit_ = false; | bool GeExecutor::isInit_ = false; | ||||
static void InitOpsProtoManager() { | |||||
string opsproto_path; | |||||
const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||||
if (path_env != nullptr) { | |||||
string path = path_env; | |||||
string file_path = RealPath(path.c_str()); | |||||
if (file_path.empty()) { | |||||
GELOGE(FAILED, "[Check][EnvPath]ASCEND_OPP_PATH path [%s] is invalid.", path.c_str()); | |||||
REPORT_INPUT_ERROR("E68016", {"ASCEND_OPP_PATH", path}); | |||||
return; | |||||
} | |||||
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); | |||||
GELOGI("Get opsproto so path from env : %s", path.c_str()); | |||||
} else { | |||||
string path_base = PluginManager::GetPath(); | |||||
GELOGI("path_base is %s", path_base.c_str()); | |||||
path_base = path_base.substr(0, path_base.rfind('/')); | |||||
path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||||
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||||
} | |||||
GELOGI("Get opsproto path is %s", opsproto_path.c_str()); | |||||
OpsProtoManager *manager = OpsProtoManager::Instance(); | |||||
map<string, string> option_tmp; | |||||
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path)); | |||||
(void)manager->Initialize(option_tmp); | |||||
} | |||||
GeExecutor::GeExecutor() {} | GeExecutor::GeExecutor() {} | ||||
Status GeExecutor::Initialize() { | Status GeExecutor::Initialize() { | ||||
@@ -208,6 +237,16 @@ Status GeExecutor::Initialize() { | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
OpTilingManager::GetInstance().LoadSo(); | |||||
Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | |||||
if (init_hostcpu_engine_status != SUCCESS) { | |||||
GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine"); | |||||
return init_hostcpu_engine_status; | |||||
} | |||||
InitOpsProtoManager(); | |||||
std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | ||||
mem_type.push_back(RT_MEMORY_P2P_DDR); | mem_type.push_back(RT_MEMORY_P2P_DDR); | ||||
auto ret = MemManager::Instance().Initialize(mem_type); | auto ret = MemManager::Instance().Initialize(mem_type); | ||||
@@ -20,6 +20,8 @@ set(OPS_KERNEL_SRC_LIST | |||||
) | ) | ||||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
protobuf_generate(ge_ops_shared PROTO_OPS_SHARED_SRCS PROTO_OPS_SHARED_HDRS ${PROTO_LIST}) | |||||
protobuf_generate(ge_ops_static PROTO_OPS_STATIC_SRCS PROTO_OPS_STATIC_HDRS ${PROTO_LIST}) | |||||
############ libge_local_engine.so ############ | ############ libge_local_engine.so ############ | ||||
add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | ||||
@@ -119,7 +121,7 @@ set_target_properties(atc_ge_local_engine PROPERTIES | |||||
) | ) | ||||
############ libge_local_opskernel_builder.so ############ | ############ libge_local_opskernel_builder.so ############ | ||||
add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||||
add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS}) | |||||
target_compile_options(ge_local_opskernel_builder PRIVATE | target_compile_options(ge_local_opskernel_builder PRIVATE | ||||
-Werror | -Werror | ||||
@@ -143,7 +145,7 @@ target_include_directories(ge_local_opskernel_builder PRIVATE | |||||
${METADEF_DIR}/inc/external/graph | ${METADEF_DIR}/inc/external/graph | ||||
${METADEF_DIR}/inc/graph | ${METADEF_DIR}/inc/graph | ||||
${CMAKE_BINARY_DIR} | ${CMAKE_BINARY_DIR} | ||||
${CMAKE_BINARY_DIR}/proto/ge | |||||
${CMAKE_BINARY_DIR}/proto/ge_ops_shared | |||||
#### yellow zone #### | #### yellow zone #### | ||||
${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
#### blue zone #### | #### blue zone #### | ||||
@@ -166,7 +168,7 @@ target_link_libraries(ge_local_opskernel_builder PRIVATE | |||||
) | ) | ||||
############ atclib/libge_local_opskernel_builder.so ############ | ############ atclib/libge_local_opskernel_builder.so ############ | ||||
add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||||
add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS}) | |||||
target_compile_options(atc_ge_local_opskernel_builder PRIVATE | target_compile_options(atc_ge_local_opskernel_builder PRIVATE | ||||
-Werror | -Werror | ||||
@@ -190,7 +192,7 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||||
${METADEF_DIR}/inc/external/graph | ${METADEF_DIR}/inc/external/graph | ||||
${METADEF_DIR}/inc/graph | ${METADEF_DIR}/inc/graph | ||||
${CMAKE_BINARY_DIR} | ${CMAKE_BINARY_DIR} | ||||
${CMAKE_BINARY_DIR}/proto/ge | |||||
${CMAKE_BINARY_DIR}/proto/ge_ops_shared | |||||
#### yellow zone #### | #### yellow zone #### | ||||
${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
#### blue zone #### | #### blue zone #### | ||||
@@ -218,7 +220,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES | |||||
) | ) | ||||
############ libge_local_opskernel_builder.a ############ | ############ libge_local_opskernel_builder.a ############ | ||||
add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||||
add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_STATIC_HDRS}) | |||||
target_compile_options(ge_local_opskernel_builder_static PRIVATE | target_compile_options(ge_local_opskernel_builder_static PRIVATE | ||||
-Werror | -Werror | ||||
@@ -243,7 +245,7 @@ target_include_directories(ge_local_opskernel_builder_static PRIVATE | |||||
${METADEF_DIR}/inc/external/graph | ${METADEF_DIR}/inc/external/graph | ||||
${METADEF_DIR}/inc/graph | ${METADEF_DIR}/inc/graph | ||||
${CMAKE_BINARY_DIR} | ${CMAKE_BINARY_DIR} | ||||
${CMAKE_BINARY_DIR}/proto/ge | |||||
${CMAKE_BINARY_DIR}/proto/ge_ops_static | |||||
#### yellow zone #### | #### yellow zone #### | ||||
${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
#### blue zone #### | #### blue zone #### | ||||
@@ -38,6 +38,7 @@ REGISTER_OP_CREATOR(ExpandDims, GeDeletedOp); | |||||
REGISTER_OP_CREATOR(Reshape, GeDeletedOp); | REGISTER_OP_CREATOR(Reshape, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(ReFormat, GeDeletedOp); | REGISTER_OP_CREATOR(ReFormat, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Squeeze, GeDeletedOp); | REGISTER_OP_CREATOR(Squeeze, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Unsqueeze, GeDeletedOp); | |||||
REGISTER_OP_CREATOR(Size, GeDeletedOp); | REGISTER_OP_CREATOR(Size, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Shape, GeDeletedOp); | REGISTER_OP_CREATOR(Shape, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(ShapeN, GeDeletedOp); | REGISTER_OP_CREATOR(ShapeN, GeDeletedOp); | ||||
@@ -16,6 +16,7 @@ set(GE_SRC_LIST | |||||
"task/label_goto_task.cc" | "task/label_goto_task.cc" | ||||
"task/label_set_task.cc" | "task/label_set_task.cc" | ||||
"task/label_switch_task.cc" | "task/label_switch_task.cc" | ||||
"task/label_manager.cc" | |||||
) | ) | ||||
add_library(ge_runtime SHARED ${GE_SRC_LIST}) | add_library(ge_runtime SHARED ${GE_SRC_LIST}) | ||||
@@ -23,26 +23,40 @@ LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::share | |||||
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), | : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), | ||||
task_info_(task_info), | task_info_(task_info), | ||||
stream_(nullptr), | stream_(nullptr), | ||||
label_(nullptr) { | |||||
index_value_(nullptr) { | |||||
if (task_info_ == nullptr) { | if (task_info_ == nullptr) { | ||||
GELOGW("task_info_ is null!"); | GELOGW("task_info_ is null!"); | ||||
return; | return; | ||||
} | } | ||||
auto stream_list = model_context.stream_list(); | auto stream_list = model_context.stream_list(); | ||||
auto label_list = model_context.label_list(); | auto label_list = model_context.label_list(); | ||||
rt_model_handle_ = model_context.rt_model_handle(); | |||||
uint32_t stream_id = task_info->stream_id(); | uint32_t stream_id = task_info->stream_id(); | ||||
uint32_t label_id = task_info->label_id(); | |||||
label_id_ = task_info->label_id(); | |||||
GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | ||||
GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); | |||||
if (stream_id >= stream_list.size() || label_id >= label_list.size()) { | |||||
GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_); | |||||
if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) { | |||||
GELOGW("Stream/Label id invalid."); | GELOGW("Stream/Label id invalid."); | ||||
return; | return; | ||||
} | } | ||||
stream_ = stream_list[stream_id]; | stream_ = stream_list[stream_id]; | ||||
label_ = label_list[label_id]; | |||||
label_manager_ = LabelManager::GetInstance(); | |||||
if (label_manager_ == nullptr) { | |||||
GELOGW("Get label manager instance failed."); | |||||
return; | |||||
} | |||||
label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list); | |||||
} | } | ||||
LabelGotoTask::~LabelGotoTask() {} | |||||
LabelGotoTask::~LabelGotoTask() { | |||||
if (index_value_ != nullptr) { | |||||
rtError_t rt_ret = rtFree(index_value_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
index_value_ = nullptr; | |||||
} | |||||
} | |||||
bool LabelGotoTask::Distribute() { | bool LabelGotoTask::Distribute() { | ||||
GELOGI("LabelGotoTask Distribute start."); | GELOGI("LabelGotoTask Distribute start."); | ||||
@@ -50,11 +64,29 @@ bool LabelGotoTask::Distribute() { | |||||
GELOGE(PARAM_INVALID, "stream is null!"); | GELOGE(PARAM_INVALID, "stream is null!"); | ||||
return false; | return false; | ||||
} | } | ||||
if (label_ == nullptr) { | |||||
GELOGE(PARAM_INVALID, "label is null!"); | |||||
if (label_info_ == nullptr) { | |||||
GELOGE(PARAM_INVALID, "label info is null!"); | |||||
return false; | return false; | ||||
} | } | ||||
rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||||
if (index_value_ == nullptr) { | |||||
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
uint64_t index = 0; | |||||
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
} | |||||
void *label_info = label_info_->GetLabelInfo(); | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return false; | return false; | ||||
@@ -65,6 +97,5 @@ bool LabelGotoTask::Distribute() { | |||||
} | } | ||||
REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); | REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge |
@@ -18,7 +18,11 @@ | |||||
#define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ | #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ | ||||
#include <memory> | #include <memory> | ||||
#include <vector> | |||||
#include <map> | |||||
#include <mutex> | |||||
#include "ge_runtime/task/task.h" | #include "ge_runtime/task/task.h" | ||||
#include "ge_runtime/task/label_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace model_runner { | namespace model_runner { | ||||
@@ -33,7 +37,11 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> { | |||||
private: | private: | ||||
std::shared_ptr<LabelGotoTaskInfo> task_info_; | std::shared_ptr<LabelGotoTaskInfo> task_info_; | ||||
void *stream_; | void *stream_; | ||||
void *label_; | |||||
std::shared_ptr<LabelGuard> label_info_; | |||||
void *index_value_; | |||||
uint32_t label_id_; | |||||
rtModel_t rt_model_handle_; | |||||
std::shared_ptr<LabelManager> label_manager_; | |||||
}; | }; | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge | ||||
@@ -0,0 +1,119 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "ge_runtime/task/label_manager.h" | |||||
#include <algorithm> | |||||
#include <string> | |||||
#include "runtime/mem.h" | |||||
#include "runtime/rt_model.h" | |||||
#include "common/ge_inner_error_codes.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
namespace ge { | |||||
namespace model_runner { | |||||
std::weak_ptr<LabelManager> LabelManager::instance_; | |||||
std::mutex LabelManager::instance_mutex_; | |||||
template <class T> | |||||
static std::string GetVectorString(const std::vector<T> &vec) { | |||||
std::string ret; | |||||
for (size_t i = 0; i < vec.size(); ++i) { | |||||
if (i != 0) { | |||||
ret.push_back(','); | |||||
} | |||||
ret += std::to_string(vec[i]); | |||||
} | |||||
return ret; | |||||
} | |||||
LabelGuard::~LabelGuard() { | |||||
void *label_info = GetLabelInfo(); | |||||
if (label_info != nullptr) { | |||||
rtError_t rt_ret = rtFree(label_info); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
} | |||||
} | |||||
std::shared_ptr<LabelManager> LabelManager::GetInstance() { | |||||
std::lock_guard<std::mutex> lock(instance_mutex_); | |||||
auto instance = instance_.lock(); | |||||
if (instance != nullptr) { | |||||
return instance; | |||||
} | |||||
instance = std::make_shared<LabelManager>(); | |||||
instance_ = instance; | |||||
return instance; | |||||
} | |||||
std::shared_ptr<LabelGuard> LabelManager::GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids, | |||||
const std::vector<void *> &all_label) { | |||||
std::lock_guard<std::mutex> lock(model_info_mapping_mutex_); | |||||
rtError_t rt_ret; | |||||
auto model_iter = model_info_mapping_.find(model); | |||||
if (model_iter == model_info_mapping_.end()) { | |||||
model_info_mapping_.emplace(model, std::map<std::string, std::weak_ptr<LabelGuard>>()); | |||||
model_iter = model_info_mapping_.find(model); | |||||
} | |||||
std::string label_id_str = GetVectorString(label_ids); | |||||
auto &label_map = model_iter->second; | |||||
auto label_iter = label_map.find(label_id_str); | |||||
if (label_iter != label_map.end()) { | |||||
auto label_guard = label_iter->second.lock(); | |||||
if (label_guard != nullptr) { | |||||
GELOGI("model %p find same label id %s.", model, label_id_str.c_str()); | |||||
return label_guard; | |||||
} | |||||
} | |||||
GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model); | |||||
void *label_info; | |||||
std::vector<void *> label_list; | |||||
bool status = true; | |||||
std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list), | |||||
[&all_label, &status](uint32_t idx) -> void * { | |||||
if (idx >= all_label.size()) { | |||||
GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size()); | |||||
status = false; | |||||
return nullptr; | |||||
} | |||||
return all_label[idx]; | |||||
}); | |||||
if (!status) { | |||||
GELOGE(PARAM_INVALID, "Get label info failed."); | |||||
return nullptr; | |||||
} | |||||
uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); | |||||
rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return nullptr; | |||||
} | |||||
rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return nullptr; | |||||
} | |||||
auto label_guard = std::make_shared<LabelGuard>(label_info); | |||||
label_map.emplace(label_id_str, label_guard); | |||||
return label_guard; | |||||
} | |||||
} // namespace model_runner | |||||
} // namespace ge |
@@ -0,0 +1,54 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||||
#define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||||
#include <vector> | |||||
#include <memory> | |||||
#include <mutex> | |||||
#include <map> | |||||
#include <runtime/base.h> | |||||
namespace ge { | |||||
namespace model_runner { | |||||
class LabelGuard { | |||||
public: | |||||
explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast<uintptr_t>(label_info)) {} | |||||
~LabelGuard(); | |||||
void *GetLabelInfo() { return reinterpret_cast<void *>(label_info_); } | |||||
private: | |||||
uintptr_t label_info_; | |||||
}; | |||||
class LabelManager { | |||||
public: | |||||
static std::shared_ptr<LabelManager> GetInstance(); | |||||
std::shared_ptr<LabelGuard> GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids, | |||||
const std::vector<void *> &all_label); | |||||
private: | |||||
std::mutex model_info_mapping_mutex_; | |||||
std::map<rtModel_t, std::map<std::string, std::weak_ptr<LabelGuard>>> model_info_mapping_; | |||||
static std::weak_ptr<LabelManager> instance_; | |||||
static std::mutex instance_mutex_; | |||||
}; | |||||
} // namespace model_runner | |||||
} // namespace ge | |||||
#endif // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ |
@@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, | |||||
: TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info), | : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info), | ||||
task_info_(task_info), | task_info_(task_info), | ||||
stream_(nullptr), | stream_(nullptr), | ||||
all_label_resource_(), | |||||
label_info_(nullptr) { | label_info_(nullptr) { | ||||
if (task_info_ == nullptr) { | if (task_info_ == nullptr) { | ||||
GELOGW("task_info_ is null!"); | GELOGW("task_info_ is null!"); | ||||
return; | return; | ||||
} | } | ||||
all_label_resource_ = model_context.label_list(); | |||||
rt_model_handle_ = model_context.rt_model_handle(); | |||||
auto all_label_resource = model_context.label_list(); | |||||
auto stream_list = model_context.stream_list(); | auto stream_list = model_context.stream_list(); | ||||
uint32_t stream_id = task_info->stream_id(); | uint32_t stream_id = task_info->stream_id(); | ||||
GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | ||||
@@ -40,52 +40,24 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, | |||||
return; | return; | ||||
} | } | ||||
stream_ = stream_list[stream_id]; | stream_ = stream_list[stream_id]; | ||||
} | |||||
LabelSwitchTask::~LabelSwitchTask() { | |||||
if (label_info_ != nullptr) { | |||||
rtError_t rt_ret = rtFree(label_info_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
label_info_ = nullptr; | |||||
label_manager_ = LabelManager::GetInstance(); | |||||
if (label_manager_ == nullptr) { | |||||
GELOGW("Get label manager instance failed."); | |||||
return; | |||||
} | } | ||||
label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource); | |||||
} | } | ||||
LabelSwitchTask::~LabelSwitchTask() {} | |||||
bool LabelSwitchTask::Distribute() { | bool LabelSwitchTask::Distribute() { | ||||
GELOGI("LabelSwitchTask Distribute start."); | GELOGI("LabelSwitchTask Distribute start."); | ||||
if (!CheckParamValid()) { | if (!CheckParamValid()) { | ||||
return false; | return false; | ||||
} | } | ||||
const std::vector<uint32_t> &label_index_list = task_info_->label_list(); | |||||
std::vector<void *> label_list(task_info_->label_size(), nullptr); | |||||
for (size_t i = 0; i < task_info_->label_size(); ++i) { | |||||
uint32_t label_index = label_index_list[i]; | |||||
if (label_index >= all_label_resource_.size()) { | |||||
GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index, | |||||
all_label_resource_.size()); | |||||
return false; | |||||
} | |||||
label_list[i] = all_label_resource_[label_index]; | |||||
GELOGI("Case %zu: label id %zu.", i, label_index); | |||||
} | |||||
uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size(); | |||||
rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_); | |||||
void *label_info = label_info_->GetLabelInfo(); | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(task_info_->cond(), task_info_->label_size(), label_info, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return false; | return false; | ||||
@@ -117,8 +89,8 @@ bool LabelSwitchTask::CheckParamValid() { | |||||
return false; | return false; | ||||
} | } | ||||
if (label_info_ != nullptr) { | |||||
GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||||
if (label_info_ == nullptr) { | |||||
GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null."); | |||||
return false; | return false; | ||||
} | } | ||||
@@ -126,6 +98,5 @@ bool LabelSwitchTask::CheckParamValid() { | |||||
} | } | ||||
REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); | REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge |
@@ -19,6 +19,7 @@ | |||||
#include <memory> | #include <memory> | ||||
#include "ge_runtime/task/task.h" | #include "ge_runtime/task/task.h" | ||||
#include "ge_runtime/task/label_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace model_runner { | namespace model_runner { | ||||
@@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> { | |||||
std::shared_ptr<LabelSwitchTaskInfo> task_info_; | std::shared_ptr<LabelSwitchTaskInfo> task_info_; | ||||
void *stream_; | void *stream_; | ||||
std::vector<void *> all_label_resource_; | |||||
void *label_info_; | |||||
rtModel_t rt_model_handle_; | |||||
std::shared_ptr<LabelGuard> label_info_; | |||||
std::shared_ptr<LabelManager> label_manager_; | |||||
}; | }; | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge | ||||
@@ -50,9 +50,13 @@ const char *const kFileNameSuffix = "online"; | |||||
const char *const kAicpuAllshape = "_AllShape"; | const char *const kAicpuAllshape = "_AllShape"; | ||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
const int64_t kDynamicDimValue = -2; | const int64_t kDynamicDimValue = -2; | ||||
const int kDefaultDeviceId = 0; | |||||
const int kDefaultJobId = 0; | |||||
std::map<ge::OpEngineType, std::string> engine_type_map{ | std::map<ge::OpEngineType, std::string> engine_type_map{ | ||||
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | |||||
{ge::ENGINE_SYS, kEngineNameDefault}, | |||||
{ge::ENGINE_AICORE, kAIcoreEngine}, | |||||
{ge::ENGINE_VECTOR, kVectorEngine}}; | |||||
bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | ||||
for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | ||||
@@ -63,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
bool IsOptional(const ge::GeTensorDesc &tensor_desc) { | |||||
return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED; | |||||
} | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -83,8 +90,9 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty | |||||
} else { | } else { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"}, | ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"}, | ||||
{op_desc->GetName(), op_desc->GetType(), "engine type", | {op_desc->GetName(), op_desc->GetType(), "engine type", | ||||
"it only support kEngineNameDefault/kAIcoreEngine/kVectorEngine"}); | |||||
GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type)); | |||||
"it only support default/AIcoreEngine/VectorEngine"}); | |||||
GELOGE(FAILED, "[Check][EngineType]value:%d not support, " | |||||
"only support default/AIcoreEngine/VectorEngine now", static_cast<int>(engine_type)); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -149,7 +157,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty | |||||
} | } | ||||
static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | ||||
bool attr) { | |||||
bool attr, int32_t &data_index) { | |||||
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
@@ -188,17 +196,21 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||||
(void)AttrUtils::SetBool(data_op, "_is_single_op", true); | (void)AttrUtils::SetBool(data_op, "_is_single_op", true); | ||||
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | |||||
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | |||||
if (attr) { | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail."); | |||||
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); | |||||
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); | |||||
if (attr && !is_const) { | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED, | |||||
"[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); | |||||
++data_index; | |||||
} | } | ||||
ge::NodePtr arg_node = graph->AddNode(data_op); | ge::NodePtr arg_node = graph->AddNode(data_op); | ||||
GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail."); | |||||
GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail"); | |||||
GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)), | GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)), | ||||
"Add edge[%s->%s] fail.", data_op->GetName().c_str(), node->GetName().c_str()); | |||||
"[Add][Edge]fail from node:%s to node:%s", data_op->GetName().c_str(), node->GetName().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -213,20 +225,23 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||||
for (const auto &out_desc : outputs) { | for (const auto &out_desc : outputs) { | ||||
GeTensorDesc tensor = out_desc.GetTensorDesc(); | GeTensorDesc tensor = out_desc.GetTensorDesc(); | ||||
TensorUtils::SetInputTensor(tensor, true); | TensorUtils::SetInputTensor(tensor, true); | ||||
GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail"); | |||||
GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str()); | |||||
TensorUtils::SetInputTensor(tensor, false); | TensorUtils::SetInputTensor(tensor, false); | ||||
TensorUtils::SetOutputTensor(tensor, true); | TensorUtils::SetOutputTensor(tensor, true); | ||||
GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail"); | |||||
GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str()); | |||||
count++; | count++; | ||||
} | } | ||||
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
ge::NodePtr out_node = graph->AddNode(op_desc); | ge::NodePtr out_node = graph->AddNode(op_desc); | ||||
GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail."); | |||||
GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, | |||||
"[Add][Node:%s]fail in graph:%u", op_desc->GetName().c_str(), graph->GetGraphID()); | |||||
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
for (int32_t i = 0; i < count; ++i) { | for (int32_t i = 0; i < count; ++i) { | ||||
GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)), | GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)), | ||||
"Add edge[%s->%s] fail.", node->GetName().c_str(), out_node->GetName().c_str()); | |||||
"[Add][Edge]fail from node:%s to node:%s", node->GetName().c_str(), out_node->GetName().c_str()); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -554,6 +569,44 @@ bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) { | |||||
return true; | return true; | ||||
} | } | ||||
Status GeGenerator::SetModelNameForDump(const GeRootModelPtr &ge_root_model) { | |||||
bool is_unknown_shape = false; | |||||
Status ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "[Check][IsUnknownShape]Check root model is unknown shape failed, model id:%u", | |||||
ge_root_model->GetModelId()); | |||||
REPORT_CALL_ERROR("E19999", "Check root model is unknown shape failed, model id:%zu", | |||||
ge_root_model->GetModelId()); | |||||
return FAILED; | |||||
} | |||||
GeModelPtr model_root = nullptr; | |||||
if (is_unknown_shape) { | |||||
model_root = MakeShared<GeModel>(); | |||||
GE_CHECK_NOTNULL(model_root); | |||||
model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); | |||||
ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); | |||||
} | |||||
ModelHelper model_helper; | |||||
string model_name; | |||||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||||
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||||
model_name); | |||||
if (name_ret != SUCCESS) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||||
GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s", | |||||
ge_root_model->GetRootGraph()->GetName().c_str()); | |||||
REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid, root graph name: %s", | |||||
ge_root_model->GetRootGraph()->GetName().c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||||
GE_CHECK_NOTNULL(ge_model); | |||||
ge_model->SetName(model_name); | |||||
return SUCCESS; | |||||
} | |||||
Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | ||||
ModelBufferData &model, bool is_offline) { | ModelBufferData &model, bool is_offline) { | ||||
rtContext_t ctx = nullptr; | rtContext_t ctx = nullptr; | ||||
@@ -588,20 +641,10 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
} | } | ||||
GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||||
ModelHelper model_helper; | |||||
string model_name = ""; | |||||
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||||
model_name); | |||||
if (name_ret != SUCCESS) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||||
GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); | |||||
return PARAM_INVALID; | |||||
ret = SetModelNameForDump(ge_root_model); | |||||
if (ret != SUCCESS) { | |||||
return ret; | |||||
} | } | ||||
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||||
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); | |||||
ge_model->SetName(model_name); | |||||
ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); | ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Save model failed"); | GELOGE(ret, "Save model failed"); | ||||
@@ -652,6 +695,34 @@ namespace { | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
bool CheckNoAicore(const ComputeGraphPtr &graph) { | |||||
for (const auto &node : graph->GetDirectNode()) { | |||||
if (node == nullptr) { | |||||
continue; | |||||
} | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
if (op_desc->GetOpEngineName() == kAIcoreEngine) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
} | |||||
void GeGenerator::RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||||
for (auto &input : inputs) { | |||||
GeTensorDesc input_desc = input.GetTensorDesc(); | |||||
bool is_const = false; | |||||
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); | |||||
bool is_optional = IsOptional(input_desc); | |||||
if (!is_optional && !is_const) { | |||||
outputs.emplace_back(input); | |||||
} | |||||
} | |||||
} | } | ||||
Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
@@ -710,7 +781,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
auto node = comp_graph->FindNode(op_desc->GetName()); | auto node = comp_graph->FindNode(op_desc->GetName()); | ||||
Status ret = CheckEngineTypeSupport(node, engine_type); | Status ret = CheckEngineTypeSupport(node, engine_type); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "check engine type failed."); | |||||
GELOGE(ret, "[Check][EngineType]value:%d for node:%s not support", engine_type, node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
@@ -718,7 +789,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
GELOGI("ATC parser success in single op build."); | GELOGI("ATC parser success in single op build."); | ||||
GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | |||||
vector<GeTensor> data_inputs; | |||||
RemoveConst(inputs, data_inputs); | |||||
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model)); | |||||
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | ||||
GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | ||||
@@ -734,7 +807,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
bool all_shape = false; | bool all_shape = false; | ||||
(void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | ||||
if (all_shape) { | |||||
if (all_shape && CheckNoAicore(root_graph)) { | |||||
GELOGD("Get aicpu all_shape kernel!"); | GELOGD("Get aicpu all_shape kernel!"); | ||||
vector<GeTensor> inputs_dynamic; | vector<GeTensor> inputs_dynamic; | ||||
vector<GeTensor> outputs_dynamic; | vector<GeTensor> outputs_dynamic; | ||||
@@ -801,18 +874,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor | |||||
// 2. Create InputData node. | // 2. Create InputData node. | ||||
int32_t arg_index = 0; | int32_t arg_index = 0; | ||||
int32_t data_index = 0; | |||||
if (inputs.empty()) { | if (inputs.empty()) { | ||||
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | ||||
GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); | GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); | ||||
if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { | if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { | ||||
continue; | continue; | ||||
} | } | ||||
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); | |||||
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index)); | |||||
arg_index++; | arg_index++; | ||||
} | } | ||||
} else { | } else { | ||||
for (const auto &in_desc : inputs) { | for (const auto &in_desc : inputs) { | ||||
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true)); | |||||
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index)); | |||||
arg_index++; | arg_index++; | ||||
} | } | ||||
} | } | ||||
@@ -871,13 +945,12 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo | |||||
"ge root model has no sub model") | "ge root model has no sub model") | ||||
GeModelPtr model_root = nullptr; | GeModelPtr model_root = nullptr; | ||||
if (is_unknown_shape) { | if (is_unknown_shape) { | ||||
model_root = make_shared<GeModel>(); | |||||
model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); | |||||
ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); | |||||
model_root->SetName(ge_root_model->GetRootGraph()->GetName()); | |||||
auto name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
model_root = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||||
} else { | } else { | ||||
model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second; | model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second; | ||||
} | } | ||||
GE_CHECK_NOTNULL(model_root); | |||||
// set atc version | // set atc version | ||||
if (!SetAtcVersionInfo(*(model_root.get()))) { | if (!SetAtcVersionInfo(*(model_root.get()))) { | ||||
GELOGW("SetPackageVersionInfo of atc failed!"); | GELOGW("SetPackageVersionInfo of atc failed!"); | ||||
@@ -915,6 +988,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
static std::atomic<uint64_t> atomic_session_id(0); | static std::atomic<uint64_t> atomic_session_id(0); | ||||
auto session_id = atomic_session_id.fetch_add(1); | auto session_id = atomic_session_id.fetch_add(1); | ||||
// This is a temporary add for graph with variable | |||||
auto version = static_cast<int32_t>(SessionVersion::ClOUD_VERSION); | |||||
ret = VarManager::Instance(session_id)->Init(version, session_id, kDefaultDeviceId, kDefaultJobId); | |||||
GELOGI("Start init var instance, session_id %lu", session_id); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("Failed init var instance, session_id %lu", session_id); | |||||
} | |||||
if (is_singleop_unregistered_) { | if (is_singleop_unregistered_) { | ||||
ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id); | ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id); | ||||
} else { | } else { | ||||
@@ -382,54 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt | |||||
return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | ||||
} | } | ||||
static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, | |||||
const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) { | |||||
GE_CHECK_NOTNULL(out_anchor); | |||||
NodePtr in_node = out_anchor->GetOwnerNode(); | |||||
GE_CHECK_NOTNULL(in_node); | |||||
OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); | |||||
OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) | |||||
.AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) | |||||
.Build(); | |||||
(void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); | |||||
if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { | |||||
GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||||
for (auto &node : graph->GetDirectNode()) { | |||||
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
auto op_type = op_desc->GetType(); | |||||
if (op_type == NETOUTPUT) { | |||||
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||||
GE_CHECK_NOTNULL(in_node); | |||||
std::string in_node_op_type = in_node->GetType(); | |||||
if (in_node_op_type == CONSTANT) { | |||||
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||||
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||||
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { | |||||
GELOGE(FAILED, "Insert memcpy between %s and %s failed.", | |||||
in_node->GetName().c_str(), node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | ||||
bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | ||||
com_graph->SetGraphUnknownFlag(false); | com_graph->SetGraphUnknownFlag(false); | ||||
@@ -512,9 +464,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
!sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | ||||
continue; | continue; | ||||
} | } | ||||
GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); | |||||
if (sub_graph->GetGraphUnknownFlag()) { | if (sub_graph->GetGraphUnknownFlag()) { | ||||
// unknown shape build flow | // unknown shape build flow | ||||
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | ||||
@@ -33,13 +33,21 @@ using std::queue; | |||||
namespace ge { | namespace ge { | ||||
LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | ||||
const string &LogicalStreamPass::GetName() const { return name_; } | |||||
const string &LogicalStreamPass::GetName() const { | |||||
return name_; | |||||
} | |||||
bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { return subgraph.engine_conf.skip_assign_stream; } | |||||
bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { | |||||
return subgraph.engine_conf.skip_assign_stream; | |||||
} | |||||
bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { return subgraph.engine_conf.attach; } | |||||
bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { | |||||
return subgraph.engine_conf.attach; | |||||
} | |||||
bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { return subgraph.engine_conf.independent; } | |||||
bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { | |||||
return subgraph.engine_conf.independent; | |||||
} | |||||
bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const { | bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const { | ||||
return !subgraph.subgraph_info.GetStreamLabel().empty(); | return !subgraph.subgraph_info.GetStreamLabel().empty(); | ||||
@@ -60,14 +68,14 @@ Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> & | |||||
// Subgraphs of the same stream_label are assigned to the same stream, | // Subgraphs of the same stream_label are assigned to the same stream, | ||||
// and different stream_labels are assigned new streams. | // and different stream_labels are assigned new streams. | ||||
auto iter = label_streams.find(stream_label); | auto iter = label_streams.find(stream_label); | ||||
if (iter != label_streams.end()) { | |||||
subgraph->stream_id = iter->second; | |||||
} else { | |||||
if (iter == label_streams.end()) { | |||||
subgraph->stream_id = next_stream; | subgraph->stream_id = next_stream; | ||||
GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str()); | GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str()); | ||||
label_streams.emplace(stream_label, next_stream); | label_streams.emplace(stream_label, next_stream); | ||||
++next_stream; | |||||
next_stream++; | |||||
} else { | |||||
subgraph->stream_id = iter->second; | |||||
} | } | ||||
changed = true; | changed = true; | ||||
} | } | ||||
@@ -92,15 +100,15 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||||
const string &stream_label = subgraph->subgraph_info.GetStreamLabel(); | const string &stream_label = subgraph->subgraph_info.GetStreamLabel(); | ||||
auto &label_streams = engine_streams[engine]; | auto &label_streams = engine_streams[engine]; | ||||
auto iter = label_streams.find(stream_label); | auto iter = label_streams.find(stream_label); | ||||
if (iter != label_streams.end()) { | |||||
subgraph->stream_id = iter->second; | |||||
} else { | |||||
if (iter == label_streams.end()) { | |||||
subgraph->stream_id = next_stream; | subgraph->stream_id = next_stream; | ||||
GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(), | GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(), | ||||
stream_label.c_str()); | stream_label.c_str()); | ||||
label_streams.emplace(stream_label, next_stream); | label_streams.emplace(stream_label, next_stream); | ||||
++next_stream; | |||||
next_stream++; | |||||
} else { | |||||
subgraph->stream_id = iter->second; | |||||
} | } | ||||
changed = true; | changed = true; | ||||
} | } | ||||
@@ -121,7 +129,9 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||||
} | } | ||||
SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map); | SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map); | ||||
if (reusable_subgraph != nullptr) { | |||||
if (reusable_subgraph == nullptr) { | |||||
(void)AssignNewStream(subgraph); | |||||
} else { | |||||
if (HasAssignedStream(*reusable_subgraph)) { | if (HasAssignedStream(*reusable_subgraph)) { | ||||
subgraph->stream_id = reusable_subgraph->stream_id; | subgraph->stream_id = reusable_subgraph->stream_id; | ||||
} else { | } else { | ||||
@@ -140,8 +150,6 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||||
GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(), | GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(), | ||||
subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(), | subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(), | ||||
reusable_subgraph->engine_conf.id.c_str()); | reusable_subgraph->engine_conf.id.c_str()); | ||||
} else { | |||||
(void)AssignNewStream(subgraph); | |||||
} | } | ||||
changed = true; | changed = true; | ||||
} | } | ||||
@@ -191,13 +199,15 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr | |||||
auto iter = pld_subgraph_map.find(end_pld_pair.second); | auto iter = pld_subgraph_map.find(end_pld_pair.second); | ||||
if (iter != pld_subgraph_map.end()) { | if (iter != pld_subgraph_map.end()) { | ||||
const SubgraphPtr &pred_subgraph_succ = iter->second; | const SubgraphPtr &pred_subgraph_succ = iter->second; | ||||
if (pred_subgraph_succ != subgraph && pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id) { | |||||
if ((pred_subgraph_succ != subgraph) && | |||||
(pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id)) { | |||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || IsEngineAttach(*subgraph)) { | |||||
if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || | |||||
IsEngineAttach(*subgraph)) { | |||||
return true; | return true; | ||||
} | } | ||||
@@ -406,7 +416,7 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<Subgr | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
auto stream_id = op_desc->GetStreamId(); | auto stream_id = op_desc->GetStreamId(); | ||||
if (stream_id != kInvalidStream && !HasStreamLabel(*subgraph)) { | |||||
if ((stream_id != kInvalidStream) && !HasStreamLabel(*subgraph)) { | |||||
ops_without_label.emplace(op_desc); | ops_without_label.emplace(op_desc); | ||||
} | } | ||||
} | } | ||||
@@ -463,7 +473,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||||
for (const NodePtr &node : graph->GetDirectNode()) { | for (const NodePtr &node : graph->GetDirectNode()) { | ||||
if (!IsHcomNode(node->GetType()) || | if (!IsHcomNode(node->GetType()) || | ||||
node->GetInDataNodes().size() <= 1) { | |||||
(node->GetInDataNodes().size() <= 1)) { | |||||
continue; | continue; | ||||
} | } | ||||
@@ -575,7 +585,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||||
GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
NodePtr parent_node = graph->GetParentNode(); | NodePtr parent_node = graph->GetParentNode(); | ||||
if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) { | |||||
if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) { | |||||
context_.default_stream = kInvalidStream; | context_.default_stream = kInvalidStream; | ||||
} else { | } else { | ||||
context_.default_stream = parent_node->GetOpDesc()->GetStreamId(); | context_.default_stream = parent_node->GetOpDesc()->GetStreamId(); | ||||
@@ -597,7 +607,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||||
return status; | return status; | ||||
} | } | ||||
GELOGD("Subgraphs of graph %s:", graph->GetName().c_str()); | |||||
GELOGD("Subgraphs of graph %s", graph->GetName().c_str()); | |||||
for (const auto &subgraph : subgraphs) { | for (const auto &subgraph : subgraphs) { | ||||
if (subgraph != nullptr) { | if (subgraph != nullptr) { | ||||
GELOGD("subgraph: %s", subgraph->name.c_str()); | GELOGD("subgraph: %s", subgraph->name.c_str()); | ||||
@@ -686,7 +696,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
if (stream_id != kInvalidStream && stream_id < stream_num) { | |||||
if ((stream_id != kInvalidStream) && (stream_id < stream_num)) { | |||||
stream_has_node[stream_id] = true; | stream_has_node[stream_id] = true; | ||||
} | } | ||||
} | } | ||||
@@ -695,10 +705,10 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
context_.next_stream = 0; | context_.next_stream = 0; | ||||
vector<int64_t> old_to_new_streams(stream_num, kInvalidStream); | vector<int64_t> old_to_new_streams(stream_num, kInvalidStream); | ||||
for (size_t old_stream = 0; old_stream < stream_has_node.size(); ++old_stream) { | |||||
for (size_t old_stream = 0; old_stream < stream_has_node.size(); old_stream++) { | |||||
if (stream_has_node[old_stream]) { | if (stream_has_node[old_stream]) { | ||||
old_to_new_streams[old_stream] = context_.next_stream; | old_to_new_streams[old_stream] = context_.next_stream; | ||||
++context_.next_stream; | |||||
context_.next_stream++; | |||||
} | } | ||||
} | } | ||||
@@ -706,7 +716,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
if (stream_id != kInvalidStream && stream_id < stream_num) { | |||||
if ((stream_id != kInvalidStream) && (stream_id < stream_num)) { | |||||
op_desc->SetStreamId(old_to_new_streams[stream_id]); | op_desc->SetStreamId(old_to_new_streams[stream_id]); | ||||
} | } | ||||
} | } | ||||
@@ -70,7 +70,10 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | ||||
GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); | |||||
GELOGE(FAILED, "[Check][MemRangeStep]first mem_range_step:%ld less than 0,invalid," | |||||
"maybe has dynamic shape in graph", all_memory_size.front()); | |||||
REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid," | |||||
"maybe has dynamic shape in graph", all_memory_size.front()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Memory size is 512 aligned, so it is not necessary to take less than 512 | // Memory size is 512 aligned, so it is not necessary to take less than 512 | ||||
@@ -81,12 +84,18 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
GELOGD("Range number: %zu", range_number); | GELOGD("Range number: %zu", range_number); | ||||
vector<vector<int64_t>> ranges(range_number); | vector<vector<int64_t>> ranges(range_number); | ||||
GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); | |||||
GE_CHK_BOOL_EXEC((range_number != 0), | |||||
REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid"); | |||||
return PARAM_INVALID, | |||||
"[Check][RangeNumber]inner data is 0, judge invalid."); | |||||
size_t range_number_limit = all_memory_size.size() / range_number; | size_t range_number_limit = all_memory_size.size() / range_number; | ||||
int64_t range_ceil = min_memory_size; | int64_t range_ceil = min_memory_size; | ||||
for (size_t i = 1; i <= range_number; i++) { | for (size_t i = 1; i <= range_number; i++) { | ||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval), | GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval), | ||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
GELOGE(FAILED, "[Check][MemRangeCeil]Multiply result is out of range," | |||||
"range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); | |||||
REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range," | |||||
"range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); | |||||
return FAILED); | return FAILED); | ||||
range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time. | range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time. | ||||
for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | ||||
@@ -30,6 +30,7 @@ | |||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/utils/type_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
@@ -457,7 +458,16 @@ Status GetNoAlignSize(const ge::OpDesc &desc, uint32_t index, size_t &size) { | |||||
DataType data_type = output_op_desc->GetDataType(); | DataType data_type = output_op_desc->GetDataType(); | ||||
graphStatus graph_status = TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | graphStatus graph_status = TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(graph_status, "CalcTensorMemSize failed!"); | |||||
GELOGE(graph_status, "[Calculate][TensorSize]shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", | |||||
shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
desc.GetName().c_str(), index); | |||||
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize fail, shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", | |||||
shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
desc.GetName().c_str(), index); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
size = static_cast<size_t>(tensor_size); | size = static_cast<size_t>(tensor_size); | ||||
@@ -586,8 +596,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | ||||
int64_t size = 0; | int64_t size = 0; | ||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", | |||||
node_op_desc->GetName().c_str(), size); | |||||
GE_IF_BOOL_EXEC(size < 0, | |||||
GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, " | |||||
"maybe it is unknown shape node, Node_name:%s", | |||||
size, node_op_desc->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, " | |||||
"maybe it is unknown shape node, Node_name:%s", | |||||
size, node_op_desc->GetName().c_str()); | |||||
return;); | return;); | ||||
batch_all_memory_size[batch_label].emplace_back(size); | batch_all_memory_size[batch_label].emplace_back(size); | ||||
if (batch_total_size.find(batch_label) == batch_total_size.end()) { | if (batch_total_size.find(batch_label) == batch_total_size.end()) { | ||||
@@ -678,22 +693,34 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | ||||
auto out_anchor = n->GetOutDataAnchor(out_index); | auto out_anchor = n->GetOutDataAnchor(out_index); | ||||
GE_IF_BOOL_EXEC(out_anchor == nullptr, | GE_IF_BOOL_EXEC(out_anchor == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] anchor is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | ||||
GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] peer_in_anchor 0 is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
auto peer_node = peer_in_anchor->GetOwnerNode(); | auto peer_node = peer_in_anchor->GetOwnerNode(); | ||||
GE_IF_BOOL_EXEC(peer_node == nullptr, | GE_IF_BOOL_EXEC(peer_node == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] node is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
// Get the continuous input type of the node, default is false | // Get the continuous input type of the node, default is false | ||||
bool is_input_continuous = false; | bool is_input_continuous = false; | ||||
auto peer_in_node_desc = peer_node->GetOpDesc(); | auto peer_in_node_desc = peer_node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] nodedesc is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
// If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
@@ -793,7 +820,10 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr & | |||||
if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | ||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | ||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | ||||
GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false; | return false; | ||||
} | } | ||||
auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | ||||
@@ -1077,7 +1107,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | ||||
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | ||||
const bool continuous, int64_t memory_type) { | const bool continuous, int64_t memory_type) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
n == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||||
return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | ||||
std::string batch_label; | std::string batch_label; | ||||
@@ -1129,7 +1162,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
block == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||||
n->GetName().c_str(), out_index); | |||||
return nullptr, | |||||
"[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||||
// Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
@@ -1188,9 +1226,15 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu | |||||
Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | ||||
const bool is_op_reuse_mem) { | const bool is_op_reuse_mem) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
n == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||||
return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
node_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
// continuous output support ref only when all output ref input | // continuous output support ref only when all output ref input | ||||
bool isAllOutputRef = true; | bool isAllOutputRef = true; | ||||
@@ -1204,7 +1248,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
} | } | ||||
if (!isAllOutputRef && isOutputHasRef) { | if (!isAllOutputRef && isOutputHasRef) { | ||||
GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", | |||||
REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s", | |||||
n->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", | |||||
n->GetName().c_str()); | n->GetName().c_str()); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -1215,7 +1261,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | ||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
if (output_op_desc == nullptr) { | if (output_op_desc == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||||
n->GetName().c_str(), index); | |||||
GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -1226,7 +1274,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
int64_t size = 0; | int64_t size = 0; | ||||
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | ||||
GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", | |||||
n->GetName().c_str(), index); | |||||
GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
size_t align_size = static_cast<size_t>(size); | size_t align_size = static_cast<size_t>(size); | ||||
@@ -1266,7 +1316,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
block->last_continuous_block_ = true; | block->last_continuous_block_ = true; | ||||
++(block->ref_count_); | ++(block->ref_count_); | ||||
} else { | } else { | ||||
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||||
REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", | |||||
n->GetName().c_str(), total_size); | |||||
GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -1274,25 +1326,44 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | ||||
const bool is_op_reuse_mem, const bool continuous) { | const bool is_op_reuse_mem, const bool continuous) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
n == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||||
return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
node_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
MemoryBlock *block = nullptr; | MemoryBlock *block = nullptr; | ||||
NodeIndexIO node_index_io(n, index, kOut); | NodeIndexIO node_index_io(n, index, kOut); | ||||
int64_t size = 0; | int64_t size = 0; | ||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); | |||||
GE_IF_BOOL_EXEC( | |||||
output_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||||
n->GetName().c_str(), index); | |||||
GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return nullptr); | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
size_t no_align_size = 0; | size_t no_align_size = 0; | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||||
return nullptr, "Get no align size failed"); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||||
REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", | |||||
n->GetName().c_str(), index); | |||||
return nullptr, | |||||
"[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
std::string symbol; | std::string symbol; | ||||
bool reuse_input = false; | bool reuse_input = false; | ||||
if (IsSymbolExist(node_index_io, symbol)) { | if (IsSymbolExist(node_index_io, symbol)) { | ||||
block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | |||||
return nullptr); | |||||
GE_IF_BOOL_EXEC(block == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", | |||||
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", | |||||
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
return nullptr); | |||||
// reduce old size | // reduce old size | ||||
size_t align_size = block->Size(); | size_t align_size = block->Size(); | ||||
AlignMemOffset(align_size); | AlignMemOffset(align_size); | ||||
@@ -1335,12 +1406,28 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
vector<bool> workspace_reuse_flag; | vector<bool> workspace_reuse_flag; | ||||
block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | ||||
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
block == nullptr, | |||||
REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||||
n->GetName().c_str(), block_size, index); | |||||
return nullptr, | |||||
"[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||||
n->GetName().c_str(), block_size, index); | |||||
} | } | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | |||||
int out_count = 0; | int out_count = 0; | ||||
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | |||||
GE_IF_BOOL_EXEC( | |||||
index >= n->GetAllOutDataAnchors().size(), | |||||
REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||||
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||||
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
return nullptr); | |||||
auto out_data_anchor = n->GetOutDataAnchor(index); | auto out_data_anchor = n->GetOutDataAnchor(index); | ||||
GE_IF_BOOL_EXEC(out_data_anchor == nullptr, GELOGE(FAILED, "Out data anchor is nullptr."); return nullptr); | |||||
GE_IF_BOOL_EXEC( | |||||
out_data_anchor == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
return nullptr); | |||||
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | ||||
auto owner_node = in_anchor->GetOwnerNode(); | auto owner_node = in_anchor->GetOwnerNode(); | ||||
auto op_desc = owner_node->GetOpDesc(); | auto op_desc = owner_node->GetOpDesc(); | ||||
@@ -1546,8 +1633,14 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | ||||
op_desc->GetOutputsSize(), memorys_type.size()); | op_desc->GetOutputsSize(), memorys_type.size()); | ||||
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | ||||
GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | |||||
op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); | |||||
REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", | |||||
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
GELOGE( | |||||
INTERNAL_ERROR, | |||||
"[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||||
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -1673,8 +1766,12 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
temp.size(), tvm_workspace_memory_type.size()); | temp.size(), tvm_workspace_memory_type.size()); | ||||
if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | ||||
GELOGE(INTERNAL_ERROR, "fusion: node[%s], tvm workspace memory size error![v_temp:%zu, workspace:%zu]", | |||||
n->GetName().c_str(), temp.size(), tvm_workspace_memory_type.size()); | |||||
REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", | |||||
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||||
temp.size(), n->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", | |||||
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||||
temp.size(), n->GetName().c_str()); | |||||
return; | return; | ||||
} | } | ||||
for (size_t i = 0; i < temp.size(); i++) { | for (size_t i = 0; i < temp.size(); i++) { | ||||
@@ -2083,8 +2180,12 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, | |||||
bool has_workspace_mem_type_attr = | bool has_workspace_mem_type_attr = | ||||
ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | ||||
if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | ||||
GELOGE(INTERNAL_ERROR, "node[%s], workspace_memory size error![index:%zu, workspace:%zu]", | |||||
node->GetName().c_str(), index, workspace_memory_type.size()); | |||||
REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " | |||||
"index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||||
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), | |||||
workspace_memory_type.size(), node->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||||
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | ||||
@@ -99,7 +99,8 @@ Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
Status GraphMemoryAssigner::AssignMemory() { | Status GraphMemoryAssigner::AssignMemory() { | ||||
ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); | ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); | ||||
if (mem_assigner->Assign() != ge::SUCCESS) { | if (mem_assigner->Assign() != ge::SUCCESS) { | ||||
GELOGE(ge::FAILED, "Memory assigner failed"); | |||||
GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | ||||
@@ -115,7 +116,10 @@ Status GraphMemoryAssigner::AssignMemory() { | |||||
auto variable_assigner = | auto variable_assigner = | ||||
std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | ||||
if (variable_assigner == nullptr) { | if (variable_assigner == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||||
GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -134,7 +138,10 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { | |||||
auto variable_assigner = | auto variable_assigner = | ||||
std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | ||||
if (variable_assigner == nullptr) { | if (variable_assigner == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||||
GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) { | if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) { | ||||
@@ -147,8 +154,10 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
auto variable_assigner = | auto variable_assigner = | ||||
std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | ||||
if (variable_assigner == nullptr) { | if (variable_assigner == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||||
return ge::FAILED; | |||||
GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
} | } | ||||
if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | ||||
return ge::FAILED; | return ge::FAILED; | ||||
@@ -161,17 +170,18 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
int64_t &batch_dim_num, int64_t &out_size) { | int64_t &batch_dim_num, int64_t &out_size) { | ||||
graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(FAILED, "Opdesc GetSize failed!"); | |||||
GELOGE(FAILED, "[Get][TensorSize]"); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory"); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GeShape output_shape = output_desc->GetShape(); | GeShape output_shape = output_desc->GetShape(); | ||||
std::vector<int64_t> output_dims = output_shape.GetDims(); | std::vector<int64_t> output_dims = output_shape.GetDims(); | ||||
if (dim_index >= static_cast<int64_t>(output_dims.size())) { | if (dim_index >= static_cast<int64_t>(output_dims.size())) { | ||||
std::string error = "Invaild value" + FmtToStr(dim_index) + | |||||
" of attr _reuse_input_on_dim_index, which is out of data range [0," | |||||
+ std::to_string(output_dims.size()) + ")"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s", | |||||
dim_index, output_dims.size(), output_shape.ToString().c_str()); | |||||
GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s", | |||||
dim_index, output_dims.size(), output_shape.ToString().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -187,14 +197,23 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size); | graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!"); | |||||
GELOGE(graph_status, "[Calc][TensorSize]"); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
if (output_mem_size < 0) { | if (output_mem_size < 0) { | ||||
std::string error = "After calculating tensor memory size, output_mem_size" + FmtToStr(output_mem_size) + | |||||
" is out of data range [0," + std::to_string(INT64_MAX) + "]"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. " | |||||
"shape:%s, format:%s, dtype:%s, maybe has dynamic shape", | |||||
output_mem_size, | |||||
output_shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(out_format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, " | |||||
"maybe has dynamic shape", | |||||
output_mem_size, | |||||
output_shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(out_format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -203,7 +222,10 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { | Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { | ||||
if (memory_offset_.empty()) { | if (memory_offset_.empty()) { | ||||
GELOGE(FAILED, "memory_offset_ is empty."); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when ReAssignMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -218,8 +240,10 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||||
auto session_id = compute_graph_->GetSessionID(); | auto session_id = compute_graph_->GetSessionID(); | ||||
if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | ||||
GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset, | |||||
VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | |||||
GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, " | |||||
"graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem", | |||||
total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(), | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
for (auto iter : mem_type_to_offset) { | for (auto iter : mem_type_to_offset) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, | ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, | ||||
{std::to_string(iter.first), std::to_string(iter.second), "featuremap", | {std::to_string(iter.first), std::to_string(iter.second), "featuremap", | ||||
@@ -234,7 +258,13 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||||
Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | ||||
BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | ||||
GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;); | |||||
if (priority_assigner == nullptr) { | |||||
REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected when AssignZeroCopyMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | |||||
} | |||||
size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; | size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; | ||||
@@ -254,8 +284,11 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||||
zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; | zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; | ||||
auto iter = memory_offset_.find(RT_MEMORY_HBM); | auto iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type[HBM]"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignZeroCopyMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | ||||
@@ -304,7 +337,7 @@ uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { | |||||
} | } | ||||
if (continuous_type != 0) { | if (continuous_type != 0) { | ||||
GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); | |||||
GELOGI("Current node %s continuous type %d", op_desc->GetName().c_str(), continuous_type); | |||||
} | } | ||||
return continuous_type; | return continuous_type; | ||||
} | } | ||||
@@ -312,8 +345,9 @@ uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { | |||||
Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, | Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, | ||||
int64_t &tensor_size, int64_t &nopadding_size) { | int64_t &tensor_size, int64_t &nopadding_size) { | ||||
if ((op_desc == nullptr) || (output_desc == nullptr)) { | if ((op_desc == nullptr) || (output_desc == nullptr)) { | ||||
GELOGE(FAILED, "Input para is nullptr."); | |||||
return FAILED; | |||||
REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, " | |||||
"not expected when GetMemorySize"); | |||||
GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr"); | |||||
} | } | ||||
tensor_size = 0; | tensor_size = 0; | ||||
nopadding_size = 0; | nopadding_size = 0; | ||||
@@ -322,7 +356,10 @@ Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &o | |||||
int64_t attr_dim_index; | int64_t attr_dim_index; | ||||
bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); | bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); | ||||
if (!get_attr_dim_flag) { | if (!get_attr_dim_flag) { | ||||
GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); | |||||
REPORT_INNER_ERROR("E19999", "Get Attr:%s failed when GetMemorySize, op_name:%s", | |||||
ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s", | |||||
ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -330,17 +367,25 @@ Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &o | |||||
int64_t batch_dim_num = 1; | int64_t batch_dim_num = 1; | ||||
if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != | if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != | ||||
SUCCESS) { | SUCCESS) { | ||||
GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); | |||||
REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s", | |||||
attr_dim_index, op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld", | |||||
op_desc->GetName().c_str(), attr_dim_index); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} else { | } else { | ||||
if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { | if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { | ||||
GELOGE(FAILED, "GetSize failed."); | |||||
REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
if ((tensor_size < 0) || (nopadding_size < 0)) { | if ((tensor_size < 0) || (nopadding_size < 0)) { | ||||
GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "GetMemorySize fail, " | |||||
"tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s", | |||||
tensor_size, nopadding_size, op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s", | |||||
tensor_size, nopadding_size, op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -374,7 +419,7 @@ bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op | |||||
// If GetBool fail, is_peer_reference is false. | // If GetBool fail, is_peer_reference is false. | ||||
(void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); | (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); | ||||
GE_IF_BOOL_EXEC(is_peer_reference, | GE_IF_BOOL_EXEC(is_peer_reference, | ||||
std::string warning = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + | |||||
std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) + | |||||
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + | " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + | ||||
" is ref. There may be conflict between the two."; | " is ref. There may be conflict between the two."; | ||||
GELOGW("%s", warning.c_str()); | GELOGW("%s", warning.c_str()); | ||||
@@ -404,7 +449,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
if (continuous_input) { | if (continuous_input) { | ||||
if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) { | if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) { | ||||
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type), | GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type), | ||||
"Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||||
"[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str()) | |||||
} else { | } else { | ||||
nodes_stack.push_back(node); | nodes_stack.push_back(node); | ||||
} | } | ||||
@@ -413,10 +458,11 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); | bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); | ||||
if (continuous_output) { | if (continuous_output) { | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), | |||||
"[Get][MemType]fail for node:%s", node->GetName().c_str()); | |||||
ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); | ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); | ||||
if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
GELOGE(ret, "Assign continuous output memory failed!"); | |||||
GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
@@ -427,14 +473,16 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
nodes_stack.pop_back(); | nodes_stack.pop_back(); | ||||
auto iter = node_2_continuous_type.find(node); | auto iter = node_2_continuous_type.find(node); | ||||
if (iter == node_2_continuous_type.end()) { | if (iter == node_2_continuous_type.end()) { | ||||
GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Inner data error when process continuous memory alloc for node:%s, " | |||||
"but has no continuous type", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | ||||
"Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||||
"[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str()) | |||||
} | } | ||||
for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first, | |||||
GELOGD("After reassign continuous memory, memory type = %ld, mem offset = %zu.", pair.first, | |||||
pair.second.mem_offset_); | pair.second.mem_offset_); | ||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -442,11 +490,13 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | ||||
GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); | |||||
GELOGI("Current node %s needs continuous input", node->GetName().c_str()); | |||||
auto iter = memory_offset_.find(memory_type); | auto iter = memory_offset_.find(memory_type); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, " | |||||
"when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s", | |||||
memory_type, node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// The head and tail of hcom continuous input should be added 512 | // The head and tail of hcom continuous input should be added 512 | ||||
@@ -459,8 +509,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
vector<int64_t> output_list_this = op_desc->GetOutputOffset(); | vector<int64_t> output_list_this = op_desc->GetOutputOffset(); | ||||
if (output_list_this.empty()) { | if (output_list_this.empty()) { | ||||
std::string error = "node:" + FmtToStr(op_desc->GetName()) + "has no output offset"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected when assign continuous input memory", | |||||
node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); | (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); | ||||
@@ -480,8 +531,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
lx_fusion = lx_fusion && !offsets_of_fusion.empty(); | lx_fusion = lx_fusion && !offsets_of_fusion.empty(); | ||||
if (lx_fusion) { | if (lx_fusion) { | ||||
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) { | if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) { | ||||
std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + | |||||
" index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; | |||||
std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) + | |||||
" anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + | |||||
" is out of range:" + FmtToStr(offsets_of_fusion.size()); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -497,7 +549,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; | bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; | ||||
vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); | vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); | ||||
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) { | if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) { | ||||
std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; | |||||
std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) + | |||||
" anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + | |||||
" is out of range:" + FmtToStr(output_list.size()); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -506,13 +560,13 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); | bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); | ||||
if (is_allocated_first_input) { | if (is_allocated_first_input) { | ||||
std::map<int32_t, int32_t> out2ins; | std::map<int32_t, int32_t> out2ins; | ||||
GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); | |||||
GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "[Get][AllRef]fail for node: %s", node->GetName().c_str()); | |||||
// output is beginning offset, set offset for input; only support this case now | // output is beginning offset, set offset for input; only support this case now | ||||
if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | ||||
auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | ||||
output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | ||||
peer_op_desc->SetOutputOffset(output_list); | peer_op_desc->SetOutputOffset(output_list); | ||||
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(), | |||||
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), | |||||
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | ||||
output_list_this.at(out2ins.begin()->first), peer_output_offset); | output_list_this.at(out2ins.begin()->first), peer_output_offset); | ||||
} else { | } else { | ||||
@@ -542,7 +596,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
} | } | ||||
GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | ||||
"size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||||
"size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(), | |||||
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | ||||
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | ||||
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | ||||
@@ -563,17 +617,32 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { | Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { | ||||
auto in_data_anchor_list = node->GetAllInDataAnchors(); | auto in_data_anchor_list = node->GetAllInDataAnchors(); | ||||
if (in_data_anchor_list.empty()) { | if (in_data_anchor_list.empty()) { | ||||
GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect when GetFirstInputPeerOutOutputOffset", | |||||
node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); | auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); | ||||
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); | |||||
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, " | |||||
"not expect when GetFirstInputPeerOutOutputOffset for node:%s", | |||||
node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str()); | |||||
return ge::FAILED); | return ge::FAILED); | ||||
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); | auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); | |||||
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, " | |||||
"not expect when GetFirstInputPeerOutOutputOffset for node:%s", | |||||
node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str()); | |||||
return ge::FAILED); | |||||
vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); | vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); | ||||
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { | if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { | ||||
GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); | |||||
REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, " | |||||
"judge invalid when GetFirstInputPeerOutOutputOffset for node:%s", | |||||
peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s", | |||||
peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); | mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); | ||||
@@ -584,11 +653,18 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||||
uint32_t continuous_type) { | uint32_t continuous_type) { | ||||
GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); | GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); | ||||
auto out_op_desc = node->GetOpDesc(); | auto out_op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); | |||||
GE_IF_BOOL_EXEC(out_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "OpDesc is null, " | |||||
"not expect when AssignContinuousOutputMemory for node:%s", | |||||
node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str())); | |||||
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | ||||
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | ||||
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||||
out_op_desc->GetOutputsSize(), output_list.size()); | |||||
REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s, " | |||||
"when AssignContinuousOutputMemory", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -647,14 +723,18 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
map<string, vector<NodePtr>> connecting_output_atomic_nodes; | map<string, vector<NodePtr>> connecting_output_atomic_nodes; | ||||
Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Failed to filter atomic nodes for memory assignment."); | |||||
GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return status; | return status; | ||||
} | } | ||||
auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); | auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (mem_iter == memory_offset_.end()) { | if (mem_iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when ReAssignAtomicMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -670,7 +750,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
vector<int64_t> mem_offset_end; | vector<int64_t> mem_offset_end; | ||||
status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", | |||||
GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.", | |||||
atomic_node->GetName().c_str()); | atomic_node->GetName().c_str()); | ||||
return status; | return status; | ||||
} | } | ||||
@@ -679,7 +759,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start; | int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start; | ||||
if (atomic_mem_size != 0) { | if (atomic_mem_size != 0) { | ||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), | GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), | ||||
"Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||||
"[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||||
} | } | ||||
} | } | ||||
batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | ||||
@@ -690,7 +770,8 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
for (auto &iter_batch : connecting_output_atomic_nodes) { | for (auto &iter_batch : connecting_output_atomic_nodes) { | ||||
mem_iter->second.mem_offset_ = batch_atomic_mem_start; | mem_iter->second.mem_offset_ = batch_atomic_mem_start; | ||||
if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) { | if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) { | ||||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | |||||
GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed." | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | ||||
@@ -721,9 +802,10 @@ Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign( | |||||
// If GetBool fail, is_reference is false. | // If GetBool fail, is_reference is false. | ||||
(void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | ||||
if (is_reference) { | if (is_reference) { | ||||
std::string error = "Op" + FmtToStr(peer_in_node_desc->GetName()) + | |||||
" cannot have both atomic and is_reference attribute."; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, " | |||||
"not support now", peer_in_node_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, " | |||||
"not support now", peer_in_node_desc->GetName().c_str()); | |||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -761,7 +843,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
// Assign atomic node output memory | // Assign atomic node output memory | ||||
Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str()); | |||||
GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -781,7 +863,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | ||||
} | } | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||||
GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} else { | } else { | ||||
@@ -794,8 +876,11 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | ||||
auto iter = memory_offset_.find(RT_MEMORY_HBM); | auto iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignConnectNetOutputAtomicMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
for (auto &node : connect_netoutput_nodes) { | for (auto &node : connect_netoutput_nodes) { | ||||
@@ -811,13 +896,14 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> & | |||||
node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | ||||
vector<int64_t> mem_offset_end; | vector<int64_t> mem_offset_end; | ||||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | ||||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.", | |||||
node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | ||||
if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { | if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { | ||||
GELOGE(FAILED, "Failed to set atomic attr separately."); | |||||
GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -842,8 +928,11 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { | |||||
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | ||||
if (out_op_desc->GetOutputsSize() > output_list.size()) { | if (out_op_desc->GetOutputsSize() > output_list.size()) { | ||||
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||||
out_op_desc->GetOutputsSize(), output_list.size()); | |||||
REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s " | |||||
"when AssignReferenceMemory", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -896,9 +985,12 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { | |||||
} | } | ||||
if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) || | if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) || | ||||
(peer_op_desc->GetType() == VARIABLE)) { | (peer_op_desc->GetType() == VARIABLE)) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" + | |||||
FmtToStr(peer_op_desc->GetName()) + " is invalid, Constant/AippData/Variable is not supported"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), " | |||||
"this situation not supported now", | |||||
peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), " | |||||
"this situation not supported now", | |||||
peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
@@ -918,22 +1010,27 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||||
// Check atomic output | // Check atomic output | ||||
vector<int64_t> output_list = op_desc->GetOutputOffset(); | vector<int64_t> output_list = op_desc->GetOutputOffset(); | ||||
if (atomic_output_index.size() > output_list.size()) { | if (atomic_output_index.size() > output_list.size()) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + | |||||
"'s size of atomic_output_index is more than the size of output_list"; | |||||
std::string error = | |||||
"Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) + | |||||
" of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
auto output_list_size = static_cast<int64_t>(output_list.size()); | auto output_list_size = static_cast<int64_t>(output_list.size()); | ||||
auto iter = memory_offset_.find(RT_MEMORY_HBM); | auto iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignAtomicOutputMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
for (auto &output_index : atomic_output_index) { | for (auto &output_index : atomic_output_index) { | ||||
if (output_index >= output_list_size) { | if (output_index >= output_list_size) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) + | |||||
" is more than the size" + FmtToStr(output_list_size) + " of output_list."; | |||||
std::string error = | |||||
"Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) + | |||||
" is more than the size:" + FmtToStr(output_list_size) + " of output_list."; | |||||
GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -941,7 +1038,8 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||||
// If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | ||||
bool is_assigned_mem = false; | bool is_assigned_mem = false; | ||||
if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | ||||
GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld", | |||||
node->GetName().c_str(), output_index); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -981,8 +1079,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||||
Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | ||||
bool &is_mem_assigned) { | bool &is_mem_assigned) { | ||||
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) + | |||||
" is more than the size of node's AllOutDataAnchors."; | |||||
std::string error = | |||||
"Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) + | |||||
" is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors."; | |||||
GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -1010,8 +1109,11 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||||
GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | ||||
auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (mem_type_iter == memory_offset_.end()) { | if (mem_type_iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignOrdinaryAtomicWorkspaceMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | ||||
@@ -1032,8 +1134,9 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||||
auto workspace_index = static_cast<uint64_t>(info_iter.first); | auto workspace_index = static_cast<uint64_t>(info_iter.first); | ||||
auto workspace_size = info_iter.second; | auto workspace_size = info_iter.second; | ||||
if (workspace_index >= workspace_vector.size()) { | if (workspace_index >= workspace_vector.size()) { | ||||
std::string error = "The workspace index" + FmtToStr(workspace_index) + | |||||
" is more than the size" + FmtToStr(workspace_vector.size()) + " of workspace vector."; | |||||
std::string error = "The workspace index:" + FmtToStr(workspace_index) + | |||||
" is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" + | |||||
op_desc->GetName().c_str(); | |||||
GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -1063,8 +1166,11 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | ||||
auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (mem_type_iter == memory_offset_.end()) { | if (mem_type_iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignFusionAtomicWorkspaceMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | ||||
@@ -1095,7 +1201,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | ||||
} | } | ||||
if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) { | if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) { | ||||
GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s when AssignFusionAtomicWorkspaceMemory", | |||||
EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.", | |||||
EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -1106,7 +1215,7 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
std::map<std::string, std::string> anchor_to_symbol; | std::map<std::string, std::string> anchor_to_symbol; | ||||
std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | ||||
if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | ||||
GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
@@ -1148,7 +1257,6 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) + | std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) + | ||||
+ " in node" + FmtToStr(node->GetName()); | + " in node" + FmtToStr(node->GetName()); | ||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
GELOGE(FAILED, "Invalid workspace in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1158,8 +1266,10 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
ge::Status GraphMemoryAssigner::SetInputOffset() { | ge::Status GraphMemoryAssigner::SetInputOffset() { | ||||
if (memory_offset_.empty()) { | if (memory_offset_.empty()) { | ||||
GELOGE(FAILED, "memory_offset_ is empty."); | |||||
return FAILED; | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when SetInputOffset, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
} | } | ||||
for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | ||||
@@ -1168,7 +1278,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { | |||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
if (UpdateOpInputOffset(node) != ge::SUCCESS) { | if (UpdateOpInputOffset(node) != ge::SUCCESS) { | ||||
GELOGE(ge::FAILED, "Update op input offset failed"); | |||||
GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1316,12 +1426,12 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { | |||||
} | } | ||||
} else if (node->GetType() == DATA_TYPE) { | } else if (node->GetType() == DATA_TYPE) { | ||||
if (UpdateConstArgsOffset(node, input_list) != SUCCESS) { | if (UpdateConstArgsOffset(node, input_list) != SUCCESS) { | ||||
GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} else { | } else { | ||||
if (UpdateOpInputOffset(node, input_list) != SUCCESS) { | if (UpdateOpInputOffset(node, input_list) != SUCCESS) { | ||||
GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1361,7 +1471,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in | |||||
peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | ||||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | ||||
if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { | if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { | ||||
GELOGE(FAILED, "Set atomic clean attr failed."); | |||||
GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1387,7 +1497,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve | |||||
(void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | ||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | ||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | ||||
GELOGE(FAILED, "SetListInt failed."); | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str()); | |||||
return FAILED); | return FAILED); | ||||
std::vector<int64_t> mem_size_vector; | std::vector<int64_t> mem_size_vector; | ||||
@@ -1395,7 +1508,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve | |||||
(void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | ||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | ||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | ||||
GELOGE(FAILED, "SetListInt failed."); | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str()); | |||||
return FAILED); | return FAILED); | ||||
std::stringstream ss; | std::stringstream ss; | ||||
@@ -1437,12 +1553,14 @@ ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nod | |||||
// In the dynamic batch scenario, the memory attributes of nodes are the same. | // In the dynamic batch scenario, the memory attributes of nodes are the same. | ||||
for (auto &n : nodes) { | for (auto &n : nodes) { | ||||
if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { | if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.") | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), | |||||
"[Get][MemType:input]fail for node:%s", n->GetName().c_str()) | |||||
break; | break; | ||||
} | } | ||||
if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { | if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), | |||||
"[Get][MemType:output]fail for node:%s", n->GetName().c_str()) | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
@@ -1478,7 +1596,7 @@ ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t & | |||||
} | } | ||||
if (!CheckContinuousMemType(mem_type_list)) { | if (!CheckContinuousMemType(mem_type_list)) { | ||||
GELOGE(FAILED, "Check continuous memory type failed."); | |||||
GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// It is continuous memory and memory type is the same, so use the first memory. | // It is continuous memory and memory type is the same, so use the first memory. | ||||
@@ -1526,7 +1644,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3 | |||||
if (node->GetInDataAnchor(reuse_in_index) != nullptr) { | if (node->GetInDataAnchor(reuse_in_index) != nullptr) { | ||||
out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index); | out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index); | ||||
} else { | } else { | ||||
GELOGE(FAILED, "Invalid reuse_input value %d on output %d of node %s, please check attr reuse_input", | |||||
REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, " | |||||
"please check attr reuse_input", | |||||
reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, " | |||||
"please check attr reuse_input", | |||||
reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -1549,7 +1671,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
auto continuous_type = iter->second; | auto continuous_type = iter->second; | ||||
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | ||||
if (continuous_input) { | if (continuous_input) { | ||||
GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.", | |||||
GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly", | |||||
input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | ||||
return false; | return false; | ||||
} | } | ||||
@@ -1559,7 +1681,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
node_2_continuous_type.emplace(out_node, continuous_type); | node_2_continuous_type.emplace(out_node, continuous_type); | ||||
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | ||||
if (continuous_input) { | if (continuous_input) { | ||||
GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.", | |||||
GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly", | |||||
input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | ||||
return false; | return false; | ||||
} | } | ||||
@@ -1575,11 +1697,12 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||||
int64_t mem_clean_size = 0; | int64_t mem_clean_size = 0; | ||||
int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), | |||||
"[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str()); | |||||
auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | ||||
continuous_type, reverse_refresh); | continuous_type, reverse_refresh); | ||||
if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
GELOGE(ret, "Assign continuous input memory failed!"); | |||||
GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -1590,7 +1713,6 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||||
if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { | if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { | ||||
// check whether there is an atomic conflict between the current node and the peer out node | // check whether there is an atomic conflict between the current node and the peer out node | ||||
if (!CheckInputIsSupportAtomic(input_continuous_node)) { | if (!CheckInputIsSupportAtomic(input_continuous_node)) { | ||||
GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!"); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -1602,7 +1724,7 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||||
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | ||||
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); | ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); | |||||
GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
@@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) { | |||||
ge::NodePtr atomic_clean_node = nullptr; | |||||
atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node); | |||||
if (atomic_clean_node == nullptr) { | |||||
return SUCCESS; | |||||
} | |||||
ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(atomic_op_desc); | |||||
TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
if (tbe_kernel == nullptr) { | |||||
std::string kernel_name; | |||||
GeAttrValue::BYTES kernel_buffer; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); | |||||
(void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); | |||||
if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | |||||
GE_CHECK_NOTNULL(kernel_buffer.GetData()); | |||||
std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | |||||
tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||||
GE_CHECK_NOTNULL(tbe_kernel); | |||||
} | |||||
} | |||||
if (tbe_kernel == nullptr) { | |||||
GELOGD("Atomic_clean_node doesn't have tbe_kernel."); | |||||
return SUCCESS; | |||||
} | |||||
tbe_kernel_store_.AddTBEKernel(tbe_kernel); | |||||
GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str()); | |||||
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName()); | |||||
std::string kernel_name; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name); | |||||
(void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); | |||||
std::string meta_data; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data); | |||||
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data); | |||||
std::string json_string; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string); | |||||
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string); | |||||
return SUCCESS; | |||||
} | |||||
Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | ||||
// Add weight | // Add weight | ||||
ge_model.SetWeight(weight_buffer_); | ge_model.SetWeight(weight_buffer_); | ||||
@@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
} | } | ||||
tbe_name_set.insert(tbe_kernel->GetName()); | tbe_name_set.insert(tbe_kernel->GetName()); | ||||
tbe_kernel_store_.AddTBEKernel(tbe_kernel); | tbe_kernel_store_.AddTBEKernel(tbe_kernel); | ||||
GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!"); | |||||
} | } | ||||
SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | ||||
@@ -89,6 +89,8 @@ class ModelBuilder { | |||||
void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | ||||
std::set<std::string> &aicpu_tf_op_types); | std::set<std::string> &aicpu_tf_op_types); | ||||
Status SaveAtomicTBEKernel(const OpDescPtr &op_desc); | |||||
uint64_t session_id_; | uint64_t session_id_; | ||||
map<int64_t, size_t> mem_type_to_mem_offset_; | map<int64_t, size_t> mem_type_to_mem_offset_; | ||||
@@ -49,6 +49,7 @@ const char *const kIsLastNode = "is_last_node"; | |||||
const char *const kIsInputVar = "INPUT_IS_VAR"; | const char *const kIsInputVar = "INPUT_IS_VAR"; | ||||
const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | ||||
const char *const kProfilingMode = "PROFILING_MODE"; | const char *const kProfilingMode = "PROFILING_MODE"; | ||||
const char *const kIteratorV2 = "IteratorV2"; | |||||
const uint32_t kProfilingArStep = 2; | const uint32_t kProfilingArStep = 2; | ||||
const uint64_t kProfilingFpStartLogid = 1; | const uint64_t kProfilingFpStartLogid = 1; | ||||
const uint64_t kProfilingBpEndLogid = 2; | const uint64_t kProfilingBpEndLogid = 2; | ||||
@@ -57,6 +58,7 @@ const uint64_t kProfilingArEndLogid = 4; | |||||
const uint64_t kProfilingIterEndLogid = 65535; | const uint64_t kProfilingIterEndLogid = 65535; | ||||
const int64_t kHashFactor = 100000; | const int64_t kHashFactor = 100000; | ||||
const int64_t kInvalidGroupId = -1; | const int64_t kInvalidGroupId = -1; | ||||
const std::set<std::string> kFpNodeTypes = {ge::DATA, ge::GETNEXT, kIteratorV2}; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) { | TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) { | ||||
@@ -621,8 +623,10 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
if (op_kernel_lib_name.empty()) { | if (op_kernel_lib_name.empty()) { | ||||
continue; | continue; | ||||
} | } | ||||
if (op_desc->GetType() == GETNEXT || op_desc->GetType() == DATA) { | |||||
auto type = op_desc->GetType(); | |||||
std::string original_type; | |||||
(void)AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, original_type); | |||||
if (kFpNodeTypes.find(type) != kFpNodeTypes.end() || kFpNodeTypes.find(original_type) != kFpNodeTypes.end()) { | |||||
auto out_anchor = node->GetOutDataAnchor(0); | auto out_anchor = node->GetOutDataAnchor(0); | ||||
for (auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | for (auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | ||||
GE_CHECK_NOTNULL(peer_in_anchor); | GE_CHECK_NOTNULL(peer_in_anchor); | ||||
@@ -20,9 +20,12 @@ | |||||
#include <string> | #include <string> | ||||
#include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "omm/csa_interact.h" | #include "omm/csa_interact.h" | ||||
namespace ge { | namespace ge { | ||||
using Uint32Pair = pair<uint32_t, uint32_t>; | |||||
const uint32_t kInvalidModelId = UINT32_MAX; | |||||
GraphExecutor::GraphExecutor() | GraphExecutor::GraphExecutor() | ||||
: init_flag_(false), | : init_flag_(false), | ||||
train_graph_flag_(false), | train_graph_flag_(false), | ||||
@@ -358,7 +361,8 @@ Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_ro | |||||
} | } | ||||
Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model, | Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model, | ||||
const std::vector<InputTensorInfo> &input_tensor) { | |||||
const std::vector<InputTensorInfo> &input_tensor, | |||||
const RunAsyncCallback& callback) { | |||||
GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id); | GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id); | ||||
if (graph_id != last_graph_id_) { | if (graph_id != last_graph_id_) { | ||||
auto ret = FreeExecuteMemory(); | auto ret = FreeExecuteMemory(); | ||||
@@ -368,7 +372,7 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr & | |||||
} | } | ||||
last_graph_id_ = graph_id; | last_graph_id_ = graph_id; | ||||
GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED); | GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED); | ||||
Status ret = AsyncExecuteModel(ge_root_model->GetModelId(), input_tensor); | |||||
Status ret = AsyncExecuteModel(ge_root_model, input_tensor, callback); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] AsyncExecuteModel Error!"); | GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] AsyncExecuteModel Error!"); | ||||
return GE_GRAPH_SYNC_MODEL_FAILED; | return GE_GRAPH_SYNC_MODEL_FAILED; | ||||
@@ -378,11 +382,81 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr & | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphExecutor::AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &inputs) { | |||||
bool CompareByLoad(const Uint32Pair &lhs, const Uint32Pair &rhs) { | |||||
return lhs.second < rhs.second; | |||||
} | |||||
uint32_t GraphExecutor::GetExecuteModelId(const GeRootModelPtr &ge_root_model) { | |||||
std::vector<uint32_t> model_ids = ge_root_model->GetAllModelId(); | |||||
if (model_ids.empty()) { | |||||
return kInvalidModelId; | |||||
} | |||||
if (model_ids.size() == 1) { | |||||
return ge_root_model->GetModelId(); | |||||
} | |||||
std::vector<Uint32Pair> model_id_to_loads; | |||||
auto model_manager = ModelManager::GetInstance(); | |||||
GE_CHECK_NOTNULL(model_manager); | |||||
for (auto model_id : model_ids) { | |||||
auto davinci_model = model_manager->GetModel(model_id); | |||||
auto hybrid_model = model_manager->GetHybridModel(model_id); | |||||
if (hybrid_model == nullptr) { | |||||
GE_CHECK_NOTNULL(davinci_model); | |||||
} | |||||
uint32_t input_load = hybrid_model != nullptr ? hybrid_model->GetDataInputerSize() : | |||||
davinci_model->GetDataInputerSize(); | |||||
uint32_t running_load = hybrid_model != nullptr ? static_cast<uint32_t>(hybrid_model->GetRunningFlag()) : | |||||
static_cast<uint32_t>(davinci_model->GetRunningFlag()); | |||||
uint32_t load = input_load + running_load; | |||||
if (load == 0) { | |||||
return model_id; | |||||
} | |||||
model_id_to_loads.emplace_back(model_id, load); | |||||
} | |||||
sort(model_id_to_loads.begin(), model_id_to_loads.end(), CompareByLoad); | |||||
if (model_id_to_loads.empty()) { | |||||
return kInvalidModelId; | |||||
} | |||||
return model_id_to_loads.begin()->first; | |||||
} | |||||
Status GraphExecutor::SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model, | |||||
const RunAsyncCallback &callback) { | |||||
auto model_manager = ge::ModelManager::GetInstance(); | |||||
GE_CHECK_NOTNULL(model_manager); | |||||
if (model_manager->IsNeedHybridLoad(*ge_root_model)) { | |||||
auto model = model_manager->GetHybridModel(model_id); | |||||
GE_CHECK_NOTNULL(model); | |||||
if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) { | |||||
GELOGE(FAILED, "SetRunAsyncListenerCallback failed."); | |||||
return FAILED; | |||||
} | |||||
} else { | |||||
auto model = model_manager->GetModel(model_id); | |||||
GE_CHECK_NOTNULL(model); | |||||
if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) { | |||||
GELOGE(FAILED, "SetRunAsyncListenerCallback failed."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &inputs, | |||||
const RunAsyncCallback &callback) { | |||||
uint32_t model_id = GetExecuteModelId(ge_root_model); | |||||
if (model_id == kInvalidModelId) { | |||||
GELOGE(INTERNAL_ERROR, "No valid model id."); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
try { | try { | ||||
auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
GELOGI("RunAsync begin.model_id %u", model_id); | GELOGI("RunAsync begin.model_id %u", model_id); | ||||
if (SetCallback(model_id, ge_root_model, callback) != SUCCESS) { | |||||
GELOGE(FAILED, "RunAsync: SetCallBack for model fail"); | |||||
return FAILED; | |||||
} | |||||
Status ret = model_manager->DataInputTensor(model_id, inputs); | Status ret = model_manager->DataInputTensor(model_id, inputs); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -50,7 +50,7 @@ class GraphExecutor { | |||||
std::vector<GeTensor> &output_tensor); | std::vector<GeTensor> &output_tensor); | ||||
ge::Status ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model, | ge::Status ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model, | ||||
const std::vector<InputTensorInfo> &input_tensor); | |||||
const std::vector<InputTensorInfo> &input_tensor, const RunAsyncCallback &callback); | |||||
Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr<GraphModelListener> listener); | Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr<GraphModelListener> listener); | ||||
@@ -116,6 +116,8 @@ class GraphExecutor { | |||||
static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | ||||
uint32_t GetExecuteModelId(const GeRootModelPtr &ge_root_model); | |||||
private: | private: | ||||
Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | ||||
OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc); | OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc); | ||||
@@ -123,7 +125,8 @@ class GraphExecutor { | |||||
Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor, | Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor, | ||||
std::vector<GeTensor> &output_tensor); | std::vector<GeTensor> &output_tensor); | ||||
Status AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &input_tensor); | |||||
Status AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &input_tensor, | |||||
const RunAsyncCallback &callback); | |||||
void InitModelIdInfo(std::vector<uint32_t> &out_model_id_info, std::vector<SubGraphInfoPtr> &sub_graph_vec, | void InitModelIdInfo(std::vector<uint32_t> &out_model_id_info, std::vector<SubGraphInfoPtr> &sub_graph_vec, | ||||
uint32_t output_size); | uint32_t output_size); | ||||
@@ -132,6 +135,9 @@ class GraphExecutor { | |||||
Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr); | Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr); | ||||
static Status SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model, | |||||
const RunAsyncCallback &callback); | |||||
bool init_flag_; | bool init_flag_; | ||||
bool train_graph_flag_; | bool train_graph_flag_; | ||||
@@ -60,7 +60,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge | |||||
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[LoadGraph] GE load graph model_ptr is nullptr."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "[LoadGraph] GE load graph model_ptr is nullptr."); | ||||
return GE_GRAPH_PARAM_NULLPTR; | return GE_GRAPH_PARAM_NULLPTR; | ||||
} | } | ||||
model_id = ge_root_model_ptr->GetModelId(); | |||||
auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
@@ -385,7 +385,7 @@ Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_inf | |||||
Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | ||||
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | ||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op); | |||||
if (output_descs.size() != output_addrs.size()) { | if (output_descs.size() != output_addrs.size()) { | ||||
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | ||||
inner_dump_info.op->GetName().c_str(), output_descs.size()); | inner_dump_info.op->GetName().c_str(), output_descs.size()); | ||||
@@ -436,7 +436,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||||
// else data, const or variable op | // else data, const or variable op | ||||
aicpu::dump::Output output; | aicpu::dump::Output output; | ||||
auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); | auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); | ||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op); | |||||
if (output_tensor == nullptr) { | if (output_tensor == nullptr) { | ||||
GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, | GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, | ||||
inner_dump_info.op->GetOutputsSize()); | inner_dump_info.op->GetOutputsSize()); | ||||
@@ -540,7 +540,7 @@ Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info | |||||
Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | ||||
GELOGI("Start dump input"); | GELOGI("Start dump input"); | ||||
const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); | const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); | ||||
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op); | |||||
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op); | |||||
if (input_descs.size() != input_addrs.size()) { | if (input_descs.size() != input_addrs.size()) { | ||||
GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), | GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), | ||||
inner_dump_info.op->GetName().c_str(), input_descs.size()); | inner_dump_info.op->GetName().c_str(), input_descs.size()); | ||||
@@ -36,9 +36,21 @@ | |||||
namespace ge { | namespace ge { | ||||
class DataDumper { | class DataDumper { | ||||
public: | public: | ||||
DataDumper() : runtime_param_{} {} | |||||
explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {} | |||||
explicit DataDumper(RuntimeParam *rsh) | |||||
: model_name_(), | |||||
model_id_(0), | |||||
runtime_param_(rsh), | |||||
dev_mem_load_(nullptr), | |||||
dev_mem_unload_(nullptr), | |||||
op_list_(), | |||||
input_map_(), | |||||
load_flag_(false), | |||||
device_id_(0), | |||||
global_step_(0), | |||||
loop_per_iter_(0), | |||||
loop_cond_(0), | |||||
compute_graph_(nullptr), | |||||
ref_info_() {} | |||||
~DataDumper(); | ~DataDumper(); | ||||
@@ -93,10 +105,10 @@ class DataDumper { | |||||
// for inference data dump | // for inference data dump | ||||
std::string om_name_; | std::string om_name_; | ||||
uint32_t model_id_ = 0; | |||||
const RuntimeParam &runtime_param_; | |||||
void *dev_mem_load_ = nullptr; | |||||
void *dev_mem_unload_ = nullptr; | |||||
uint32_t model_id_; | |||||
RuntimeParam *runtime_param_; | |||||
void *dev_mem_load_; | |||||
void *dev_mem_unload_; | |||||
struct InnerDumpInfo; | struct InnerDumpInfo; | ||||
struct InnerInputMapping; | struct InnerInputMapping; | ||||
@@ -107,12 +119,12 @@ class DataDumper { | |||||
uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
bool is_end_graph_ = false; | bool is_end_graph_ = false; | ||||
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | ||||
bool load_flag_ = false; | |||||
uint32_t device_id_ = 0; | |||||
uintptr_t global_step_ = 0; | |||||
uintptr_t loop_per_iter_ = 0; | |||||
uintptr_t loop_cond_ = 0; | |||||
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init | |||||
bool load_flag_; | |||||
uint32_t device_id_; | |||||
uintptr_t global_step_; | |||||
uintptr_t loop_per_iter_; | |||||
uintptr_t loop_cond_; | |||||
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | ||||
void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
@@ -134,6 +134,8 @@ class DataInputer { | |||||
/// | /// | ||||
void Stop() { queue_.Stop(); } | void Stop() { queue_.Stop(); } | ||||
uint32_t Size() { return queue_.Size(); } | |||||
private: | private: | ||||
/// | /// | ||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
@@ -31,6 +31,7 @@ | |||||
#include "common/scope_guard.h" | #include "common/scope_guard.h" | ||||
#include "common/thread_pool.h" | #include "common/thread_pool.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/util.h" | |||||
#include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
#include "graph/compute_graph.h" | #include "graph/compute_graph.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
@@ -184,7 +185,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||||
last_execute_mode_(INITIALIZATION), | last_execute_mode_(INITIALIZATION), | ||||
session_id_(0), | session_id_(0), | ||||
device_id_(0), | device_id_(0), | ||||
maxDumpOpNum_(0), data_dumper_(runtime_param_), | |||||
maxDumpOpNum_(0), data_dumper_(&runtime_param_), | |||||
iterator_count_(0), | iterator_count_(0), | ||||
is_l1_fusion_enable_(false), | is_l1_fusion_enable_(false), | ||||
is_first_execute_(true) { | is_first_execute_(true) { | ||||
@@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() { | |||||
GE_CHK_STATUS(task->Release(), "Release task failed."); | GE_CHK_STATUS(task->Release(), "Release task failed."); | ||||
} | } | ||||
} | } | ||||
for (auto &item : label_goto_args_) { | |||||
GE_FREE_RT_LOG(item.second.first); | |||||
} | |||||
label_goto_args_.clear(); | |||||
} | } | ||||
Status DavinciModel::Assign(const GeModelPtr &ge_model) { | Status DavinciModel::Assign(const GeModelPtr &ge_model) { | ||||
@@ -654,12 +660,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
runtime_param_.graph_id = compute_graph->GetGraphID(); | runtime_param_.graph_id = compute_graph->GetGraphID(); | ||||
// op debug register | // op debug register | ||||
GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed."); | |||||
GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed"); | |||||
GE_TIMESTAMP_START(TransAllVarData); | GE_TIMESTAMP_START(TransAllVarData); | ||||
GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed."); | |||||
GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed"); | |||||
GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData"); | GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData"); | ||||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed."); | |||||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed"); | |||||
GE_TIMESTAMP_START(InitModelMem); | GE_TIMESTAMP_START(InitModelMem); | ||||
GELOGD("Known node is %d.", known_node_); | GELOGD("Known node is %d.", known_node_); | ||||
@@ -667,7 +673,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
if (!known_node_) { | if (!known_node_) { | ||||
GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | ||||
data_inputer_ = new (std::nothrow) DataInputer(); | data_inputer_ = new (std::nothrow) DataInputer(); | ||||
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); | |||||
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr"); | |||||
} | } | ||||
fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_); | fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_); | ||||
GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem"); | GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem"); | ||||
@@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||||
} | } | ||||
} | } | ||||
Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) { | |||||
std::lock_guard<std::mutex> lock(label_args_mutex_); | |||||
auto it = label_goto_args_.find(label_index); | |||||
if (it != label_goto_args_.end()) { | |||||
arg_addr = it->second.first; | |||||
arg_size = it->second.second; | |||||
return SUCCESS; | |||||
} | |||||
if (label_index >= label_list_.size()) { | |||||
GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GE_CHECK_NOTNULL(label_list_[label_index]); | |||||
vector<rtLabel_t> label_used = { label_list_[label_index] }; | |||||
arg_size = label_used.size() * sizeof(rtLabelDevInfo); | |||||
rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
label_goto_args_[label_index] = { arg_addr, arg_size }; | |||||
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief LabelSet Op Initialize. | /// @brief LabelSet Op Initialize. | ||||
/// @param [in] op_desc: LabelSet Op descriptor. | /// @param [in] op_desc: LabelSet Op descriptor. | ||||
@@ -2547,6 +2586,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute); | ||||
while (model->RunFlag()) { | while (model->RunFlag()) { | ||||
// Model hasn't truly started runing before received data | |||||
model->SetRunningFlag(false); | |||||
bool rslt_flg = true; | bool rslt_flg = true; | ||||
if (model->GetDataInputer() == nullptr) { | if (model->GetDataInputer() == nullptr) { | ||||
GELOGW("Data inputer is nullptr."); | GELOGW("Data inputer is nullptr."); | ||||
@@ -2556,6 +2597,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
std::shared_ptr<InputDataWrapper> data_wrapper; | std::shared_ptr<InputDataWrapper> data_wrapper; | ||||
Status ret = model->GetDataInputer()->Pop(data_wrapper); | Status ret = model->GetDataInputer()->Pop(data_wrapper); | ||||
// Model run indeedly start after received data. | |||||
model->SetRunningFlag(true); | |||||
if (data_wrapper == nullptr || ret != SUCCESS) { | if (data_wrapper == nullptr || ret != SUCCESS) { | ||||
GELOGI("data_wrapper is null!"); | GELOGI("data_wrapper is null!"); | ||||
continue; | continue; | ||||
@@ -2642,7 +2685,9 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
model->iterator_count_++; | model->iterator_count_++; | ||||
model->is_first_execute_ = false; | model->is_first_execute_ = false; | ||||
GELOGI("run iterator count is %lu", model->iterator_count_); | |||||
// model run finished | |||||
model->SetRunningFlag(false); | |||||
GELOGI("run iterator count is %lu, model_id:%u", model->iterator_count_, model->model_id_); | |||||
} | } | ||||
CsaInteract::GetInstance().WriteInternalErrorCode(); | CsaInteract::GetInstance().WriteInternalErrorCode(); | ||||
@@ -2700,7 +2745,7 @@ Status DavinciModel::ModelRunStart() { | |||||
error_context_ = ErrorManager::GetInstance().GetErrorContext(); | error_context_ = ErrorManager::GetInstance().GetErrorContext(); | ||||
CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); | CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); | ||||
GELOGI("model tread create success, model id:%u.", model_id_); | |||||
GELOGI("model thread create success, model id:%u.", model_id_); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2836,23 +2881,16 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
GELOGI("DavinciModel::UpdateKnownNodeArgs in"); | GELOGI("DavinciModel::UpdateKnownNodeArgs in"); | ||||
GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs), | GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs), | ||||
"DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy."); | "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy."); | ||||
if (!base_addr_not_changed_) { | |||||
total_io_addrs_.clear(); | |||||
orig_total_io_addrs_.clear(); | |||||
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||||
auto &task = task_list_[task_index]; | |||||
if (task != nullptr) { | |||||
Status ret = task->UpdateArgs(); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||||
return FAILED; | |||||
} | |||||
total_io_addrs_.clear(); | |||||
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||||
auto &task = task_list_[task_index]; | |||||
if (task != nullptr) { | |||||
Status ret = task->UpdateArgs(); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||||
return FAILED; | |||||
} | } | ||||
} | } | ||||
// cache latest iterator io addr | |||||
orig_total_io_addrs_ = total_io_addrs_; | |||||
} else { | |||||
total_io_addrs_ = orig_total_io_addrs_; | |||||
} | } | ||||
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | ||||
@@ -2892,6 +2930,14 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const { | |||||
int64_t value = RT_CAPABILITY_SUPPORT; | |||||
auto rt_ret = rtGetRtCapability(featureType, featureInfo, &value); | |||||
GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "call rtGetRtCapability failed!"); | |||||
is_support = (value == RT_CAPABILITY_SUPPORT) ? true : false; | |||||
return SUCCESS; | |||||
} | |||||
Status DavinciModel::MallocKnownArgs() { | Status DavinciModel::MallocKnownArgs() { | ||||
GELOGI("DavinciModel::MallocKnownArgs in"); | GELOGI("DavinciModel::MallocKnownArgs in"); | ||||
const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
@@ -2910,20 +2956,22 @@ Status DavinciModel::MallocKnownArgs() { | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
rtError_t rt_ret; | |||||
// malloc args memory | // malloc args memory | ||||
if (total_args_size_ == 0) { | |||||
GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero."); | |||||
return SUCCESS; | |||||
} | |||||
bool is_support = false; | |||||
GE_CHK_STATUS_RET_NOLOG(CheckCapability(FEATURE_TYPE_MEMORY, MEMORY_INFO_TS_4G_LIMITED, is_support)); | |||||
auto mem_type = is_support ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||||
rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
if (total_args_size_ != 0) { | |||||
rt_ret = rtMalloc(&args_, total_args_size_, mem_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
} | } | ||||
// malloc dynamic and static hybrid memory | // malloc dynamic and static hybrid memory | ||||
if (total_hybrid_args_size_ != 0) { | if (total_hybrid_args_size_ != 0) { | ||||
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||||
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, mem_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -2932,7 +2980,7 @@ Status DavinciModel::MallocKnownArgs() { | |||||
// malloc fixed addr memory, eg: rts op | // malloc fixed addr memory, eg: rts op | ||||
if (total_fixed_addr_size_ != 0) { | if (total_fixed_addr_size_ != 0) { | ||||
GELOGI("Begin to allocate fixed addr."); | GELOGI("Begin to allocate fixed addr."); | ||||
rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, RT_MEMORY_HBM); | |||||
rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, mem_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -3025,9 +3073,8 @@ Status DavinciModel::DistributeTask() { | |||||
task_def.kernel_ex().op_index()); | task_def.kernel_ex().op_index()); | ||||
OpDescPtr op = GetOpByIndex(op_index); | OpDescPtr op = GetOpByIndex(op_index); | ||||
GE_CHECK_NOTNULL(op); | GE_CHECK_NOTNULL(op); | ||||
if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | ||||
bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); | |||||
bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo(); | |||||
if (call_dump || is_op_debug_reg_) { | if (call_dump || is_op_debug_reg_) { | ||||
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | ||||
} | } | ||||
@@ -3047,11 +3094,16 @@ Status DavinciModel::DistributeTask() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||||
bool DavinciModel::ModelNeedDump() { | |||||
auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | ||||
bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); | |||||
bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); | |||||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { | |||||
bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
all_dump_model.find(dump_model_name_) != all_dump_model.end() || | |||||
all_dump_model.find(om_name_) != all_dump_model.end(); | |||||
return ret; | |||||
} | |||||
void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||||
if (ModelNeedDump()) { | |||||
GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | ||||
data_dumper_.SaveEndGraphId(task_id, stream_id); | data_dumper_.SaveEndGraphId(task_id, stream_id); | ||||
} | } | ||||
@@ -3851,7 +3903,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||||
} | } | ||||
void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | ||||
data_dumper_.SetModelName(name_); | |||||
if(dump_model_name_.empty()) { | |||||
dump_model_name_ = name_; | |||||
} | |||||
data_dumper_.SetModelName(dump_model_name_); | |||||
data_dumper_.SetModelId(model_id_); | data_dumper_.SetModelId(model_id_); | ||||
data_dumper_.SetOmName(om_name_); | data_dumper_.SetOmName(om_name_); | ||||
data_dumper_.SetComputeGraph(graph); | data_dumper_.SetComputeGraph(graph); | ||||
@@ -4040,7 +4095,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||||
Status DavinciModel::InitL1DataDumperArgs() { | Status DavinciModel::InitL1DataDumperArgs() { | ||||
auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | ||||
bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); | bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); | ||||
bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end(); | |||||
bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end(); | |||||
bool dump_l1fusion_op = | bool dump_l1fusion_op = | ||||
(all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; | (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; | ||||
if (dump_l1fusion_op) { | if (dump_l1fusion_op) { | ||||
@@ -4061,4 +4116,10 @@ Status DavinciModel::InitL1DataDumperArgs() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | |||||
auto listener = dynamic_cast<RunAsyncListener *>(listener_.get()); | |||||
GE_CHECK_NOTNULL(listener); | |||||
listener->SetCallback(callback); | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -221,6 +221,11 @@ class DavinciModel { | |||||
/// | /// | ||||
DataInputer *const GetDataInputer() const { return data_inputer_; } | DataInputer *const GetDataInputer() const { return data_inputer_; } | ||||
uint32_t GetDataInputerSize() { | |||||
GE_CHECK_NOTNULL(data_inputer_); | |||||
return data_inputer_->Size(); | |||||
} | |||||
// get Stream number | // get Stream number | ||||
uint32_t StreamNum() const { return runtime_param_.stream_num; } | uint32_t StreamNum() const { return runtime_param_.stream_num; } | ||||
@@ -248,7 +253,10 @@ class DavinciModel { | |||||
string Name() const { return name_; } | string Name() const { return name_; } | ||||
// om_name | // om_name | ||||
string OmName() const { return om_name_; } | |||||
const string &OmName() const { return om_name_; } | |||||
// dump_model_name | |||||
const string &DumpModelName() const { return dump_model_name_; } | |||||
// version | // version | ||||
uint32_t Version() const { return version_; } | uint32_t Version() const { return version_; } | ||||
@@ -273,6 +281,8 @@ class DavinciModel { | |||||
const vector<rtLabel_t> &GetLabelList() const { return label_list_; } | const vector<rtLabel_t> &GetLabelList() const { return label_list_; } | ||||
Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size); | |||||
Status DestroyThread(); | Status DestroyThread(); | ||||
// get Op | // get Op | ||||
@@ -481,6 +491,12 @@ class DavinciModel { | |||||
data_dumper_.DumpShrink(); | data_dumper_.DumpShrink(); | ||||
} | } | ||||
bool OpNeedDump(const string &op_name) { | |||||
return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name); | |||||
} | |||||
bool ModelNeedDump(); | |||||
void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
DavinciModel &operator=(const DavinciModel &model) = delete; | DavinciModel &operator=(const DavinciModel &model) = delete; | ||||
@@ -528,11 +544,11 @@ class DavinciModel { | |||||
} | } | ||||
void SetKnownNode(bool known_node) { known_node_ = known_node; } | void SetKnownNode(bool known_node) { known_node_ = known_node; } | ||||
bool IsKnownNode() { return known_node_; } | bool IsKnownNode() { return known_node_; } | ||||
Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const; | |||||
Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true); | Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true); | ||||
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; | ||||
Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | ||||
@@ -540,6 +556,7 @@ class DavinciModel { | |||||
// om file name | // om file name | ||||
void SetOmName(const string &om_name) { om_name_ = om_name; } | void SetOmName(const string &om_name) { om_name_ = om_name; } | ||||
void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; } | |||||
void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | ||||
const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | ||||
@@ -548,6 +565,10 @@ class DavinciModel { | |||||
return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | ||||
} | } | ||||
bool GetRunningFlag() const { return running_flg_; } | |||||
void SetRunningFlag(bool flag) { running_flg_ = flag; } | |||||
Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | |||||
private: | private: | ||||
// memory address of weights | // memory address of weights | ||||
uint8_t *weights_mem_base_; | uint8_t *weights_mem_base_; | ||||
@@ -886,6 +907,7 @@ class DavinciModel { | |||||
// used for inference data dump | // used for inference data dump | ||||
string om_name_; | string om_name_; | ||||
string dump_model_name_; | |||||
uint32_t version_; | uint32_t version_; | ||||
GeModelPtr ge_model_; // release after DavinciModel::Init | GeModelPtr ge_model_; // release after DavinciModel::Init | ||||
@@ -911,6 +933,8 @@ class DavinciModel { | |||||
shared_ptr<ModelListener> listener_; | shared_ptr<ModelListener> listener_; | ||||
bool run_flg_; | bool run_flg_; | ||||
// check whether model is running with data | |||||
bool running_flg_ = false; | |||||
mutex mux_run_flg_; | mutex mux_run_flg_; | ||||
@@ -930,6 +954,9 @@ class DavinciModel { | |||||
vector<rtLabel_t> label_list_; | vector<rtLabel_t> label_list_; | ||||
set<uint32_t> label_id_indication_; | set<uint32_t> label_id_indication_; | ||||
mutex label_args_mutex_; | |||||
map<uint32_t, pair<void *, uint32_t>> label_goto_args_; | |||||
mutex outside_addrs_mutex_; | mutex outside_addrs_mutex_; | ||||
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | ||||
set<const void *> copy_only_addrs_; // Address need copy to original place. | set<const void *> copy_only_addrs_; // Address need copy to original place. | ||||
@@ -1002,8 +1029,6 @@ class DavinciModel { | |||||
map<const void *, void *> known_input_data_info_; | map<const void *, void *> known_input_data_info_; | ||||
map<const void *, void *> known_output_data_info_; | map<const void *, void *> known_output_data_info_; | ||||
vector<void *> total_io_addrs_; | vector<void *> total_io_addrs_; | ||||
vector<void *> orig_total_io_addrs_; | |||||
bool base_addr_not_changed_ = false; | |||||
vector<vector<int64_t>> batch_info_; | vector<vector<int64_t>> batch_info_; | ||||
vector<vector<int64_t>> combined_batch_info_; | vector<vector<int64_t>> combined_batch_info_; | ||||
@@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uin | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &model_name, | |||||
ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &om_name, | |||||
const shared_ptr<ge::GeRootModel> &ge_root_model, | const shared_ptr<ge::GeRootModel> &ge_root_model, | ||||
const shared_ptr<ModelListener> &listener) { | const shared_ptr<ModelListener> &listener) { | ||||
auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | ||||
@@ -279,13 +279,24 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string | |||||
hybrid_model->SetListener(listener); | hybrid_model->SetListener(listener); | ||||
hybrid_model->SetModelId(model_id); | hybrid_model->SetModelId(model_id); | ||||
hybrid_model->SetDeviceId(GetContext().DeviceId()); | hybrid_model->SetDeviceId(GetContext().DeviceId()); | ||||
hybrid_model->SetModelName(model_name); | |||||
hybrid_model->SetOmName(om_name); | |||||
GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); | GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); | ||||
auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release()); | auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release()); | ||||
InsertModel(model_id, shared_model); | InsertModel(model_id, shared_model); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) { | |||||
auto root_graph = ge_root_model.GetRootGraph(); | |||||
if (root_graph == nullptr) { | |||||
GELOGE(FAILED, "no model on root model"); | |||||
return false; | |||||
} | |||||
bool is_shape_unknown = root_graph->GetGraphUnknownFlag(); | |||||
bool is_dsp_partitioned_graph = false; | |||||
(void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph); | |||||
return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag(); | |||||
} | |||||
/// | /// | ||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
/// @brief load model online | /// @brief load model online | ||||
@@ -296,14 +307,12 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null"); | GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null"); | ||||
if (model_id == INVALID_MODEL_ID) { | if (model_id == INVALID_MODEL_ID) { | ||||
GenModelId(&model_id); | GenModelId(&model_id); | ||||
GELOGD("Generate new model_id:%u", model_id); | |||||
} | } | ||||
bool is_shape_unknown = false; | |||||
string model_name = ""; | |||||
GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u", | |||||
model_id); | |||||
if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||||
return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); | |||||
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
string om_name; | |||||
if (IsNeedHybridLoad(*ge_root_model)) { | |||||
return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener); | |||||
} | } | ||||
mmTimespec timespec = mmGetTickCount(); | mmTimespec timespec = mmGetTickCount(); | ||||
@@ -324,7 +333,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
auto root_graph = ge_root_model->GetRootGraph(); | auto root_graph = ge_root_model->GetRootGraph(); | ||||
GE_CHECK_NOTNULL(root_graph); | GE_CHECK_NOTNULL(root_graph); | ||||
string root_model_name = root_graph->GetName(); | string root_model_name = root_graph->GetName(); | ||||
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
GeModelPtr ge_model = name_to_model[root_model_name]; | GeModelPtr ge_model = name_to_model[root_model_name]; | ||||
Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
do { | do { | ||||
@@ -332,7 +340,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed."); | GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed."); | ||||
break;); | break;); | ||||
GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign"); | GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign"); | ||||
/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | |||||
/// These session_ids come from the same model, so the values of session_id are the same. | |||||
/// Update session_id for infer in load model to avoid the same session_id. | |||||
if (!ge_root_model->GetTrainFlag()) { | |||||
uint64_t new_session_id; | |||||
ret = GenSessionId(new_session_id); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); | |||||
ret = davinci_model->UpdateSessionId(new_session_id); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); | |||||
ge_model->InsertSessionMap(model_id, new_session_id); | |||||
GELOGD("Update new session id: %lu.", new_session_id); | |||||
} | |||||
GE_TIMESTAMP_START(Init); | GE_TIMESTAMP_START(Init); | ||||
GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;); | GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;); | ||||
GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit"); | GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit"); | ||||
@@ -345,16 +364,16 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
return ret; | return ret; | ||||
} | } | ||||
void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) { | |||||
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); | |||||
void ModelManager::InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model) { | |||||
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", model_id); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | std::lock_guard<std::recursive_mutex> lock(map_mutex_); | ||||
model_map_[id] = davinci_model; | |||||
model_map_[model_id] = davinci_model; | |||||
} | } | ||||
void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | |||||
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); | |||||
void ModelManager::InsertModel(uint32_t model_id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | |||||
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", model_id); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | std::lock_guard<std::recursive_mutex> lock(map_mutex_); | ||||
hybrid_model_map_[id] = hybrid_model; | |||||
hybrid_model_map_[model_id] = hybrid_model; | |||||
} | } | ||||
Status ModelManager::DeleteModel(uint32_t id) { | Status ModelManager::DeleteModel(uint32_t id) { | ||||
@@ -294,6 +294,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
std::vector<InputOutputDims> &output_dims); | std::vector<InputOutputDims> &output_dims); | ||||
bool IsDynamicShape(uint32_t model_id); | bool IsDynamicShape(uint32_t model_id); | ||||
bool IsNeedHybridLoad(ge::GeRootModel &ge_root_model); | |||||
ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | ||||
ge::Status EnableExceptionDump(const std::map<string, string> &options); | ge::Status EnableExceptionDump(const std::map<string, string> &options); | ||||
@@ -329,8 +330,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
/// @brief insert new model into model manager set | /// @brief insert new model into model manager set | ||||
/// | /// | ||||
void InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model); | |||||
void InsertModel(uint32_t id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model); | |||||
void InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model); | |||||
void InsertModel(uint32_t model_id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model); | |||||
/// | /// | ||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
@@ -384,7 +384,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc | |||||
switch (mem_type) { | switch (mem_type) { | ||||
case RT_MEMORY_RDMA_HBM: | case RT_MEMORY_RDMA_HBM: | ||||
if (offset < 0) { | if (offset < 0) { | ||||
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||||
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", | |||||
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset))); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)); | var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)); | ||||
@@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
Status EndGraphTaskInfo::Distribute() { | Status EndGraphTaskInfo::Distribute() { | ||||
GELOGI("EndGraphTaskInfo Distribute Start."); | GELOGI("EndGraphTaskInfo Distribute Start."); | ||||
GE_CHECK_NOTNULL(davinci_model_); | GE_CHECK_NOTNULL(davinci_model_); | ||||
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||||
if (davinci_model_->ModelNeedDump()) { | |||||
GELOGI("Start to call rtEndGraphEx"); | GELOGI("Start to call rtEndGraphEx"); | ||||
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
@@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
} | } | ||||
void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | ||||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
op_desc->GetName())) { | |||||
if (davinci_model_->OpNeedDump(op_desc->GetName())) { | |||||
dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
dump_args_ = addr; | dump_args_ = addr; | ||||
} | } | ||||
@@ -124,7 +124,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
ret = InitTVMTask(args_offset_tmp[0], kernel_def); | |||||
io_addr_offset_ = args_offset_tmp[0]; | |||||
ret = InitTVMTask(io_addr_offset_, kernel_def); | |||||
} else if (kernel_type_ == ccKernelType::CUSTOMIZED) { | } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { | ||||
ret = InitAICPUCustomTask(context.op_index(), kernel_def); | ret = InitAICPUCustomTask(context.op_index(), kernel_def); | ||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
@@ -380,7 +381,8 @@ Status KernelTaskInfo::Distribute() { | |||||
GELOGD("KernelTaskInfo Distribute Start."); | GELOGD("KernelTaskInfo Distribute Start."); | ||||
if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
if (kernel_type_ == ccKernelType::TE) { | if (kernel_type_ == ccKernelType::TE) { | ||||
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||||
: davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | ||||
} | } | ||||
@@ -407,10 +409,7 @@ Status KernelTaskInfo::Distribute() { | |||||
call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | ||||
// l1 fusion enable and env flag open (kCloseSkt for skt debug) | // l1 fusion enable and env flag open (kCloseSkt for skt debug) | ||||
bool open_dump = false; | bool open_dump = false; | ||||
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||||
if (davinci_model_->ModelNeedDump()) { | |||||
open_dump = true; | open_dump = true; | ||||
} | } | ||||
if (call_skt && (env_flag != kCloseSkt) && !open_dump) { | if (call_skt && (env_flag != kCloseSkt) && !open_dump) { | ||||
@@ -449,29 +448,41 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | |||||
} | } | ||||
} | } | ||||
Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) { | |||||
GE_CHECK_NOTNULL(davinci_model_); | |||||
// copy new io addrs | |||||
vector<void *> io_addrs = io_addrs_; | |||||
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
auto addr_size = kAddrLen * io_addrs.size(); | |||||
// copy io addr | |||||
errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs.data(), addr_size); | |||||
if (sec_ret != EOK) { | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
} | |||||
// copy args to device | |||||
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_); | |||||
return SUCCESS; | |||||
} | |||||
Status KernelTaskInfo::UpdateArgs() { | Status KernelTaskInfo::UpdateArgs() { | ||||
GELOGI("KernelTaskInfo::UpdateArgs in."); | GELOGI("KernelTaskInfo::UpdateArgs in."); | ||||
GE_CHECK_NOTNULL(davinci_model_); | |||||
if (kernel_type_ == ccKernelType::TE) { | if (kernel_type_ == ccKernelType::TE) { | ||||
if (l2_buffer_on_) { | |||||
return CopyNoncontinuousArgs(io_addr_offset_); | |||||
} | |||||
davinci_model_->SetTotalIOAddrs(io_addrs_); | davinci_model_->SetTotalIOAddrs(io_addrs_); | ||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
vector<void *> io_addrs = io_addrs_; | |||||
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||||
auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||||
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||||
if (sec_ret != EOK) { | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
} | |||||
// copy args to device | |||||
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead)); | |||||
} | } | ||||
GELOGI("KernelTaskInfo::UpdateArgs success."); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -516,8 +527,8 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
char *sm_contrl = const_cast<char *>(sm_desc.data()); | |||||
rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_contrl); | |||||
char *sm_control = const_cast<char *>(sm_desc.data()); | |||||
rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control); | |||||
uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr(); | uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr(); | ||||
// There is no weight for te op now. Update L2_mirror_addr by data memory base. | // There is no weight for te op now. Update L2_mirror_addr by data memory base. | ||||
@@ -545,19 +556,31 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void KernelTaskInfo::SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
davinci_model->SetTotalArgsSize(args_size); | |||||
} | |||||
void KernelTaskInfo::SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||||
hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
davinci_model->SetHybridArgsSize(args_size); | |||||
} | |||||
Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
GE_CHECK_NOTNULL(davinci_model); | |||||
const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | ||||
uint32_t args_size = kernel_def.args_size(); | |||||
if (kernel_type_ == ccKernelType::TE) { | if (kernel_type_ == ccKernelType::TE) { | ||||
uint32_t args_size = kernel_def.args_size(); | |||||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
davinci_model->SetTotalArgsSize(args_size); | |||||
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
if (kernel_def.sm_desc().empty()) { | |||||
SetContinuousArgs(args_size, davinci_model); | |||||
return SUCCESS; | |||||
} | |||||
l2_buffer_on_ = true; | |||||
SetNoncontinuousArgs(args_size, davinci_model); | |||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||||
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||||
SetNoncontinuousArgs(args_size, davinci_model); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -568,8 +591,23 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
// get tvm op desc | // get tvm op desc | ||||
OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); | OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||||
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||||
if (sec_ret != EOK) { | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
} | |||||
Status ge_ret = UpdateL2Data(kernel_def); | |||||
// update origin l2 data | |||||
if (ge_ret != SUCCESS) { | |||||
return ge_ret; | |||||
} | |||||
if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||||
: davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
InitDumpTask(offset); | InitDumpTask(offset); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -609,12 +647,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
vector<uint8_t> args_info(args_size_); | |||||
errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | |||||
if (sec_ret != EOK) { | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
} | |||||
if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) { | if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) { | ||||
GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory."); | GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory."); | ||||
@@ -628,7 +660,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||||
sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||||
kAddrLen * tensor_device_addrs.size()); | kAddrLen * tensor_device_addrs.size()); | ||||
if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | ||||
@@ -640,19 +672,13 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset, | GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset, | ||||
"Op debug is open in TVM task info"); | "Op debug is open in TVM task info"); | ||||
Status ge_ret = UpdateL2Data(kernel_def); | |||||
// update origin l2 data | |||||
if (ge_ret != SUCCESS) { | |||||
return ge_ret; | |||||
} | |||||
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | ||||
virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | ||||
virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | ||||
if (op_desc->GetType() == ATOMICADDRCLEAN) { | if (op_desc->GetType() == ATOMICADDRCLEAN) { | ||||
virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | ||||
} | } | ||||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); | |||||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, offset); | |||||
GELOGD("Do InitTVMTask end"); | GELOGD("Do InitTVMTask end"); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -951,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
} | } | ||||
void KernelTaskInfo::InitDumpTask(uint32_t offset) { | void KernelTaskInfo::InitDumpTask(uint32_t offset) { | ||||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
op_desc_->GetName())) { | |||||
if (davinci_model_->OpNeedDump(op_desc_->GetName())) { | |||||
if (IsL1FusionOp(op_desc_)) { | if (IsL1FusionOp(op_desc_)) { | ||||
dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | ||||
} else { | } else { | ||||
@@ -129,6 +129,9 @@ class KernelTaskInfo : public TaskInfo { | |||||
bool IsL1FusionOp(const OpDescPtr &op_desc); | bool IsL1FusionOp(const OpDescPtr &op_desc); | ||||
void SetIoAddrs(const OpDescPtr &op_desc); | void SetIoAddrs(const OpDescPtr &op_desc); | ||||
void InitDumpTask(uint32_t offset); | void InitDumpTask(uint32_t offset); | ||||
void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||||
void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||||
Status CopyNoncontinuousArgs(uint16_t offset); | |||||
// For super kernel | // For super kernel | ||||
Status SaveSKTDumpInfo(); | Status SaveSKTDumpInfo(); | ||||
@@ -163,6 +166,8 @@ class KernelTaskInfo : public TaskInfo { | |||||
uint32_t hybrid_args_offset_ = 0; | uint32_t hybrid_args_offset_ = 0; | ||||
int64_t fixed_addr_offset_ = 0; | int64_t fixed_addr_offset_ = 0; | ||||
std::unique_ptr<uint8_t[]> args_addr = nullptr; | std::unique_ptr<uint8_t[]> args_addr = nullptr; | ||||
uint16_t io_addr_offset_ = 0; | |||||
bool l2_buffer_on_ = false; | |||||
bool call_save_dump_ = false; | bool call_save_dump_ = false; | ||||
// aicpu ext_info device mem | // aicpu ext_info device mem | ||||
@@ -17,9 +17,15 @@ | |||||
#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
namespace ge { | namespace ge { | ||||
constexpr uint8_t kGotoBranchMax = 1; | |||||
LabelGotoExTaskInfo::~LabelGotoExTaskInfo() { | |||||
args_ = nullptr; | |||||
GE_FREE_RT_LOG(index_value_); | |||||
} | |||||
Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
GELOGI("LabelGotoExTaskInfo Init Start."); | GELOGI("LabelGotoExTaskInfo Init Start."); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
@@ -28,7 +34,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Get LabelGoto task def | |||||
// Get LabelGotoEx task def | |||||
const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex(); | const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex(); | ||||
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index()); | OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index()); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -43,20 +49,38 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||||
if (label_index >= label_list.size()) { | |||||
GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size()); | |||||
return INTERNAL_ERROR; | |||||
rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||||
GELOGI("memory_type: %u", memory_type); | |||||
GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_)); | |||||
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
label_ = label_list[label_index]; | |||||
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_); | |||||
uint64_t branch_index = 0; | |||||
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status LabelGotoExTaskInfo::Distribute() { | Status LabelGotoExTaskInfo::Distribute() { | ||||
GELOGI("LabelGotoExTaskInfo Distribute Start."); | GELOGI("LabelGotoExTaskInfo Distribute Start."); | ||||
rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||||
GE_CHECK_NOTNULL(args_); | |||||
GE_CHECK_NOTNULL(index_value_); | |||||
if (args_size_ == 0) { | |||||
GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_); | |||||
return PARAM_INVALID; | |||||
} | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -14,24 +14,26 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
namespace ge { | namespace ge { | ||||
class LabelGotoExTaskInfo : public TaskInfo { | class LabelGotoExTaskInfo : public TaskInfo { | ||||
public: | public: | ||||
LabelGotoExTaskInfo() : label_(nullptr) {} | |||||
LabelGotoExTaskInfo() = default; | |||||
~LabelGotoExTaskInfo() override { label_ = nullptr; } | |||||
~LabelGotoExTaskInfo() override; | |||||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
Status Distribute() override; | Status Distribute() override; | ||||
private: | private: | ||||
void *label_; | |||||
void *index_value_{nullptr}; // switch index input. | |||||
void *args_{nullptr}; // label info memory. | |||||
uint32_t args_size_{0}; // label info length. | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ |
@@ -14,8 +14,8 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
@@ -34,4 +34,4 @@ class LabelSetTaskInfo : public TaskInfo { | |||||
void *label_; | void *label_; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ |
@@ -16,20 +16,13 @@ | |||||
#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
namespace ge { | namespace ge { | ||||
constexpr uint8_t kLabelSwitchIndexNum = 1; | constexpr uint8_t kLabelSwitchIndexNum = 1; | ||||
LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() { | LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() { | ||||
if (args_ != nullptr) { | |||||
rtError_t ret = rtFree(args_); | |||||
if (ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); | |||||
} | |||||
} | |||||
args_ = nullptr; | |||||
GE_FREE_RT_LOG(args_); | |||||
index_value_ = nullptr; | index_value_ = nullptr; | ||||
} | } | ||||
@@ -37,13 +30,12 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
GELOGI("LabelSwitchByIndexTaskInfo Init Start."); | GELOGI("LabelSwitchByIndexTaskInfo Init Start."); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||||
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Get LabelSwitch task def | |||||
// Get LabelSwitchByIndex task def | |||||
const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index(); | const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index(); | ||||
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index()); | OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index()); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -68,7 +60,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
davinci_model->DisableZeroCopy(index_value_); | davinci_model->DisableZeroCopy(index_value_); | ||||
std::vector<uint32_t> label_idx_list; | |||||
vector<uint32_t> label_idx_list; | |||||
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) { | if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) { | ||||
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(), | GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(), | ||||
ATTR_NAME_LABEL_SWITCH_LIST.c_str()); | ATTR_NAME_LABEL_SWITCH_LIST.c_str()); | ||||
@@ -81,7 +73,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
label_list_.resize(branch_max_, nullptr); | |||||
vector<rtLabel_t> label_used(branch_max_, nullptr); | |||||
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||||
for (size_t idx = 0; idx < label_idx_list.size(); ++idx) { | for (size_t idx = 0; idx < label_idx_list.size(); ++idx) { | ||||
uint32_t label_id = label_idx_list[idx]; | uint32_t label_id = label_idx_list[idx]; | ||||
if (label_id >= label_list.size()) { | if (label_id >= label_list.size()) { | ||||
@@ -90,8 +83,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GE_CHECK_NOTNULL(label_list[label_id]); | GE_CHECK_NOTNULL(label_list[label_id]); | ||||
label_list_[idx] = label_list[label_id]; | |||||
label_used[idx] = label_list[label_id]; | |||||
} | } | ||||
rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | ||||
@@ -103,7 +95,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); | |||||
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -125,7 +117,7 @@ Status LabelSwitchByIndexTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_); | rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("LabelSwitchByIndexTaskInfo Distribute Success."); | GELOGI("LabelSwitchByIndexTaskInfo Distribute Success."); | ||||
@@ -14,16 +14,15 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
namespace ge { | namespace ge { | ||||
class LabelSwitchByIndexTaskInfo : public TaskInfo { | class LabelSwitchByIndexTaskInfo : public TaskInfo { | ||||
public: | public: | ||||
LabelSwitchByIndexTaskInfo() | |||||
: index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {} | |||||
LabelSwitchByIndexTaskInfo() = default; | |||||
~LabelSwitchByIndexTaskInfo() override; | ~LabelSwitchByIndexTaskInfo() override; | ||||
@@ -34,12 +33,11 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
private: | private: | ||||
void *index_value_; // switch index input. | |||||
uint32_t branch_max_; // max branch count. | |||||
void *args_; // label info memory. | |||||
uint32_t args_size_; // label info length. | |||||
std::vector<rtLabel_t> label_list_; | |||||
int64_t fixed_addr_offset_; | |||||
void *index_value_{nullptr}; // switch index input. | |||||
uint32_t branch_max_{0}; // max branch count. | |||||
void *args_{nullptr}; // label info memory. | |||||
uint32_t args_size_{0}; // label info length. | |||||
int64_t fixed_addr_offset_{0}; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ |
@@ -40,7 +40,7 @@ static bool BlockComparator(const Block *left, const Block *right) { | |||||
} | } | ||||
bool CanMerge(Block *block) { | bool CanMerge(Block *block) { | ||||
if (block == nullptr || block->allocated || !block->IsSplit()) { | |||||
if ((block == nullptr) || block->allocated || !block->IsSplit()) { | |||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
@@ -52,7 +52,7 @@ size_t GetBinIndex(size_t size) { | |||||
if (size <= range) { | if (size <= range) { | ||||
break; | break; | ||||
} | } | ||||
++index; | |||||
index++; | |||||
} | } | ||||
if (index > kNumBins - 1) { | if (index > kNumBins - 1) { | ||||
index = kNumBins - 1; | index = kNumBins - 1; | ||||
@@ -87,15 +87,15 @@ bool ShouldSplit(const Block *block, size_t size) { | |||||
void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | ||||
auto it = count.find(size); | auto it = count.find(size); | ||||
if (it != count.end()) { | |||||
it->second++; | |||||
} else { | |||||
if (it == count.end()) { | |||||
count.emplace(size, 1); | count.emplace(size, 1); | ||||
} else { | |||||
it->second++; | |||||
} | } | ||||
} | } | ||||
CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
free_block_bins_[i] = nullptr; | free_block_bins_[i] = nullptr; | ||||
} | } | ||||
} | } | ||||
@@ -105,7 +105,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||||
// when redo Initialize free old memory | // when redo Initialize free old memory | ||||
FreeBlocks(); | FreeBlocks(); | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
if (free_block_bins_[i] != nullptr) { | if (free_block_bins_[i] != nullptr) { | ||||
continue; | continue; | ||||
} | } | ||||
@@ -132,18 +132,18 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||||
uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | ||||
GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | ||||
uint8_t *ptr = nullptr; | |||||
size = GetBlockSize(size); | size = GetBlockSize(size); | ||||
uint8_t *ptr = nullptr; | |||||
Block *block = FindFreeBlock(size, org_ptr, device_id); | Block *block = FindFreeBlock(size, org_ptr, device_id); | ||||
if (block != nullptr) { | |||||
ptr = block->ptr; | |||||
} else { | |||||
if (block == nullptr) { | |||||
if (ge::SUCCESS == TryExtendCache(size, device_id)) { | if (ge::SUCCESS == TryExtendCache(size, device_id)) { | ||||
block = FindFreeBlock(size, org_ptr, device_id); | block = FindFreeBlock(size, org_ptr, device_id); | ||||
if (block != nullptr) { | if (block != nullptr) { | ||||
ptr = block->ptr; | ptr = block->ptr; | ||||
} | } | ||||
} | } | ||||
} else { | |||||
ptr = block->ptr; | |||||
} | } | ||||
if (ptr == nullptr) { | if (ptr == nullptr) { | ||||
GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); | GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); | ||||
@@ -171,7 +171,7 @@ Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { | |||||
} | } | ||||
void CachingAllocator::FreeBlock(Block *block) { | void CachingAllocator::FreeBlock(Block *block) { | ||||
if (block == nullptr || !block->allocated) { | |||||
if ((block == nullptr) || !block->allocated) { | |||||
return; | return; | ||||
} | } | ||||
GELOGI("Free block size = %zu", block->size); | GELOGI("Free block size = %zu", block->size); | ||||
@@ -187,7 +187,7 @@ void CachingAllocator::FreeBlock(Block *block) { | |||||
} | } | ||||
void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) { | void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) { | ||||
if (!CanMerge(dst) || !CanMerge(src)) { | |||||
if (!CanMerge(src) || !CanMerge(dst)) { | |||||
return; | return; | ||||
} | } | ||||
@@ -316,7 +316,7 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||||
GELOGI("Free cached blocks"); | GELOGI("Free cached blocks"); | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
size_t free_cached_memory_size = 0; | size_t free_cached_memory_size = 0; | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
auto pool = free_block_bins_[i]; | auto pool = free_block_bins_[i]; | ||||
if (pool == nullptr) { | if (pool == nullptr) { | ||||
continue; | continue; | ||||
@@ -324,7 +324,8 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||||
for (auto it = pool->begin(); it != pool->end();) { | for (auto it = pool->begin(); it != pool->end();) { | ||||
Block *block = *it; | Block *block = *it; | ||||
// free block memory that has not been split | // free block memory that has not been split | ||||
if ((block != nullptr) && (block->ptr != nullptr) && (block->prev == nullptr) && (block->next == nullptr) && | |||||
if ((block != nullptr) && (block->ptr != nullptr) && | |||||
(block->prev == nullptr) && (block->next == nullptr) && | |||||
(memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) { | (memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) { | ||||
auto itcount = malloced_memory_.find(block->size); | auto itcount = malloced_memory_.find(block->size); | ||||
free_cached_memory_size += block->size; | free_cached_memory_size += block->size; | ||||
@@ -345,7 +346,7 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||||
} | } | ||||
void CachingAllocator::FreeBlocks() { | void CachingAllocator::FreeBlocks() { | ||||
GELOGI("Free blocks"); | |||||
GELOGI("Free blocks."); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
// free allocated blocks and put to cache | // free allocated blocks and put to cache | ||||
for (auto &it : allocated_blocks_) { | for (auto &it : allocated_blocks_) { | ||||
@@ -355,10 +356,18 @@ void CachingAllocator::FreeBlocks() { | |||||
(void) FreeCachedBlocks(); | (void) FreeCachedBlocks(); | ||||
} | } | ||||
void CachingAllocator::TryFreeBlocks() { | |||||
GELOGI("Try free blocks."); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
if (allocated_blocks_.empty()) { | |||||
(void) FreeCachedBlocks(); | |||||
} | |||||
} | |||||
void CachingAllocator::FreeBlockBins() { | void CachingAllocator::FreeBlockBins() { | ||||
GELOGI("Free block bins"); | |||||
GELOGI("Free block bins."); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
if (free_block_bins_[i] != nullptr) { | if (free_block_bins_[i] != nullptr) { | ||||
delete free_block_bins_[i]; | delete free_block_bins_[i]; | ||||
free_block_bins_[i] = nullptr; | free_block_bins_[i] = nullptr; | ||||
@@ -367,9 +376,9 @@ void CachingAllocator::FreeBlockBins() { | |||||
} | } | ||||
void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | ||||
GELOGI("%6s total[size:%10zu count:%10zu]", name.c_str(), total_size, total_count); | |||||
GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count); | |||||
for (auto &it : count) { | for (auto &it : count) { | ||||
GELOGI(" |- block[size:%10zu count:%10zu]", it.first, it.second); | |||||
GELOGI(" |- block[size:%10zu count:%10zu].", it.first, it.second); | |||||
} | } | ||||
} | } | ||||
@@ -383,20 +392,20 @@ void CachingAllocator::PrintStatics() { | |||||
size_t total_free_count = 0; | size_t total_free_count = 0; | ||||
size_t total_malloc_size = 0; | size_t total_malloc_size = 0; | ||||
size_t total_malloc_count = 0; | size_t total_malloc_count = 0; | ||||
std::map<size_t, size_t> using_block; | |||||
std::map<size_t, size_t> free_block; | |||||
std::map<size_t, size_t> malloc_block; | |||||
std::map<size_t, size_t> using_block_stat; | |||||
std::map<size_t, size_t> free_block_stat; | |||||
std::map<size_t, size_t> malloc_block_stat; | |||||
do { | do { | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
auto pool = free_block_bins_[i]; | auto pool = free_block_bins_[i]; | ||||
if (pool == nullptr) { | if (pool == nullptr) { | ||||
continue; | continue; | ||||
} | } | ||||
for (auto it = pool->begin(); it != pool->end(); ++it) { | |||||
for (auto it = pool->begin(); it != pool->end(); it++) { | |||||
if ((*it) != nullptr) { | if ((*it) != nullptr) { | ||||
total_free_size += (*it)->size; | total_free_size += (*it)->size; | ||||
IncreaseCount(free_block, (*it)->size); | |||||
IncreaseCount(free_block_stat, (*it)->size); | |||||
total_free_count++; | total_free_count++; | ||||
} | } | ||||
} | } | ||||
@@ -405,7 +414,7 @@ void CachingAllocator::PrintStatics() { | |||||
for (auto &it : allocated_blocks_) { | for (auto &it : allocated_blocks_) { | ||||
if (it.second != nullptr) { | if (it.second != nullptr) { | ||||
total_using_size += it.second->size; | total_using_size += it.second->size; | ||||
IncreaseCount(using_block, it.second->size); | |||||
IncreaseCount(using_block_stat, it.second->size); | |||||
total_using_count++; | total_using_count++; | ||||
} | } | ||||
} | } | ||||
@@ -413,12 +422,12 @@ void CachingAllocator::PrintStatics() { | |||||
for (auto &it : malloced_memory_) { | for (auto &it : malloced_memory_) { | ||||
total_malloc_size += it.first * it.second; | total_malloc_size += it.first * it.second; | ||||
total_malloc_count += it.second; | total_malloc_count += it.second; | ||||
malloc_block[it.first] = it.second; | |||||
malloc_block_stat[it.first] = it.second; | |||||
} | } | ||||
} while (0); | } while (0); | ||||
PrintCount(malloc_block, "Malloc", total_malloc_size, total_malloc_count); | |||||
PrintCount(using_block, "Using", total_using_size, total_using_count); | |||||
PrintCount(free_block, "Free", total_free_size, total_free_count); | |||||
PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count); | |||||
PrintCount(using_block_stat, "Using", total_using_size, total_using_count); | |||||
PrintCount(free_block_stat, "Free", total_free_size, total_free_count); | |||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -94,6 +94,13 @@ class CachingAllocator { | |||||
/// | /// | ||||
Status Free(uint8_t *memory_addr, uint32_t device_id = 0); | Status Free(uint8_t *memory_addr, uint32_t device_id = 0); | ||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief try to free memory when no memory is referenced | |||||
/// @return void | |||||
/// | |||||
void TryFreeBlocks(); | |||||
private: | private: | ||||
/// | /// | ||||
@@ -117,6 +117,10 @@ const char *const kAIcoreEngine = "AIcoreEngine"; | |||||
const int32_t kDynamicDimsTypeIsGetNext = 0; | const int32_t kDynamicDimsTypeIsGetNext = 0; | ||||
const int32_t kDynamicDimsTypeIsData = 1; | const int32_t kDynamicDimsTypeIsData = 1; | ||||
const char *const kGetNextName = "IteratorV2"; | const char *const kGetNextName = "IteratorV2"; | ||||
const uint32_t kInitGraphCount = 1; | |||||
const uint32_t kNotAdded = 0; | |||||
const uint32_t kStartAdd = 1; | |||||
const uint32_t kDoneAdded = 2; | |||||
bool IsTailingOptimization() { | bool IsTailingOptimization() { | ||||
string is_tailing_optimization_option; | string is_tailing_optimization_option; | ||||
@@ -195,6 +199,8 @@ Status GraphManager::Initialize(const std::map<string, string> &options) { | |||||
graph_map_.clear(); | graph_map_.clear(); | ||||
cache_helper_map_.clear(); | cache_helper_map_.clear(); | ||||
graph_id_to_add_graph_cond_.clear(); | |||||
graph_count_.clear(); | |||||
init_flag_ = true; | init_flag_ = true; | ||||
thread_run_flag_ = true; | thread_run_flag_ = true; | ||||
@@ -204,6 +210,20 @@ Status GraphManager::Initialize(const std::map<string, string> &options) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphManager::UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id) { | |||||
Status ret = SUCCESS; | |||||
for (size_t i = 0; i < ge_root_model->GetAllModelId().size(); ++i) { | |||||
uint32_t model_id = ge_root_model->GetAllModelId()[i]; | |||||
GELOGI("Unload model %u.", model_id); | |||||
ret = GraphLoader::UnloadModel(model_id); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||||
return ret; | |||||
} | |||||
} | |||||
return ret; | |||||
} | |||||
Status GraphManager::Finalize() { | Status GraphManager::Finalize() { | ||||
if (!init_flag_) { | if (!init_flag_) { | ||||
GELOGW("GraphManager has not been initialized."); | GELOGW("GraphManager has not been initialized."); | ||||
@@ -234,7 +254,6 @@ Status GraphManager::Finalize() { | |||||
unload_model_ret = GE_GRAPH_GRAPH_IS_RUNNING; | unload_model_ret = GE_GRAPH_GRAPH_IS_RUNNING; | ||||
continue; | continue; | ||||
} | } | ||||
// unload model | // unload model | ||||
auto ge_root_model = graph_node->GetGeRootModel(); | auto ge_root_model = graph_node->GetGeRootModel(); | ||||
if (ge_root_model != nullptr && ge_root_model->GetModelId() != INVALID_MODEL_ID && graph_node->GetLoadFlag()) { | if (ge_root_model != nullptr && ge_root_model->GetModelId() != INVALID_MODEL_ID && graph_node->GetLoadFlag()) { | ||||
@@ -244,15 +263,14 @@ Status GraphManager::Finalize() { | |||||
unload_model_ret = FAILED; | unload_model_ret = FAILED; | ||||
continue; | continue; | ||||
} | } | ||||
ret = GraphLoader::UnloadModel(ge_root_model->GetModelId()); | |||||
ret = UnloadModel(ge_root_model, iter->first); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), iter->first); | |||||
GELOGW("[GraphManager] unload model failed, graph_id=%u.", iter->first); | |||||
unload_model_ret = ret; | unload_model_ret = ret; | ||||
} | } | ||||
rt_ret = rtDeviceReset(GetContext().DeviceId()); | rt_ret = rtDeviceReset(GetContext().DeviceId()); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGW("[GraphManager] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | |||||
iter->first); | |||||
GELOGW("[GraphManager] rtDeviceReset failed, graphId=%u.", iter->first); | |||||
unload_model_ret = FAILED; | unload_model_ret = FAILED; | ||||
continue; | continue; | ||||
} | } | ||||
@@ -267,6 +285,7 @@ Status GraphManager::Finalize() { | |||||
} | } | ||||
graph_map_.clear(); | graph_map_.clear(); | ||||
cache_helper_map_.clear(); | cache_helper_map_.clear(); | ||||
graph_count_.clear(); | |||||
// graph context | // graph context | ||||
if (graph_context_ != nullptr) { | if (graph_context_ != nullptr) { | ||||
@@ -317,30 +336,59 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
const std::map<std::string, std::string> &options, | |||||
const OmgContext &omg_context) { | |||||
if (HasGraphNode(graph_id)) { | |||||
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id); | |||||
return GE_GRAPH_GRAPH_ALREADY_EXIST; | |||||
void GraphManager::SetAddGraphCondition(GraphId graph_id, uint32_t cond) { | |||||
std::lock_guard<std::mutex> lock(add_graph_cond_mutex_); | |||||
graph_id_to_add_graph_cond_[graph_id] = cond; | |||||
GELOGD("Graph [id:%u] has been added.", graph_id); | |||||
} | |||||
uint32_t GraphManager::GetAddGraphCondition(GraphId graph_id) { | |||||
std::lock_guard<std::mutex> lock(add_graph_cond_mutex_); | |||||
auto it = graph_id_to_add_graph_cond_.find(graph_id); | |||||
if (it != graph_id_to_add_graph_cond_.end()) { | |||||
return it->second; | |||||
} else { | |||||
GELOGD("Graph [id:%u] has not been added.", graph_id); | |||||
return kNotAdded; | |||||
} | } | ||||
} | |||||
auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||||
if (compute_graph != nullptr) { | |||||
compute_graph->SetGraphID(graph_id); | |||||
bool graph_has_been_added = false; | |||||
if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) | |||||
&& graph_has_been_added) { | |||||
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, | |||||
"[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id); | |||||
return GE_GRAPH_GRAPH_ALREADY_EXIST; | |||||
} | |||||
(void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); | |||||
compute_graph_ = compute_graph; | |||||
void GraphManager::RemoveAddGraphCondition(GraphId graph_id) { | |||||
std::lock_guard<std::mutex> lock(add_graph_cond_mutex_); | |||||
auto it = graph_id_to_add_graph_cond_.find(graph_id); | |||||
if (it != graph_id_to_add_graph_cond_.end()) { | |||||
graph_id_to_add_graph_cond_.erase(it); | |||||
GELOGD("Successfully removed add_graph_cond of graph [id:%u].", graph_id); | |||||
} else { | } else { | ||||
GELOGE(FAILED, "compute graph is null"); | |||||
return FAILED; | |||||
GELOGD("Graph [id:%u] has not been added. no need to remove.", graph_id); | |||||
} | |||||
} | |||||
Status GraphManager::CheckRepeatAdd(uint32_t graph_id, bool &is_added) { | |||||
uint32_t count = 0; | |||||
if (GetGraphCount(graph_id, count) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
// previous thread owns same graph_id has been in the middle of the AddGraph procession | |||||
if (count > 1 && GetAddGraphCondition(graph_id) == kStartAdd) { | |||||
std::unique_lock<std::mutex> lock(add_graph_mutex_); | |||||
GELOGD("Waitting for build end of previous thread."); | |||||
while (GetAddGraphCondition(graph_id) != kDoneAdded) { | |||||
add_graph_cv_.wait(lock); | |||||
} | |||||
GraphNodePtr graph_node; | |||||
Status ret = GetGraphNode(graph_id, graph_node); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "[AddGraph] GetGraphNode failed, graph_id = %u.", graph_id); | |||||
return ret; | |||||
} | |||||
is_added = true; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void GraphManager::SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id) { | |||||
std::string session_graph_id; | std::string session_graph_id; | ||||
if (!AttrUtils::GetStr(*compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id) || session_graph_id.empty()) { | if (!AttrUtils::GetStr(*compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id) || session_graph_id.empty()) { | ||||
session_graph_id = "-1_" + to_string(graph_id); | session_graph_id = "-1_" + to_string(graph_id); | ||||
@@ -352,17 +400,92 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
} | } | ||||
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | ||||
} | } | ||||
} | |||||
Status GraphManager::NotifyWaittingGraph(uint32_t graph_id) { | |||||
uint32_t count = 0; | |||||
if (GetGraphCount(graph_id, count) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GELOGD("Add graph finished, graph_id:%u", graph_id); | |||||
if (count > 1) { | |||||
GELOGD("Finish addgraph, graph_id:%u, graph_count:%u, start to notify.", graph_id, count); | |||||
add_graph_cv_.notify_all(); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphManager::CreateGraphNode(uint32_t graph_id, const Graph &graph, | |||||
const std::map<std::string, std::string> &options) { | |||||
GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | ||||
GE_IF_BOOL_EXEC(graph_node == nullptr, GELOGE(FAILED, "GraphNode make shared failed"); | GE_IF_BOOL_EXEC(graph_node == nullptr, GELOGE(FAILED, "GraphNode make shared failed"); | ||||
return FAILED); | return FAILED); | ||||
std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph); | std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph); | ||||
GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed"); | GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed"); | ||||
return FAILED); | return FAILED); | ||||
// update option about tuning graph | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
ParseOption(options, TUNING_PATH, options_.tuning_path); | |||||
graph_node->SetGraph(graph_ptr); | graph_node->SetGraph(graph_ptr); | ||||
graph_node->SetOptions(options); | graph_node->SetOptions(options); | ||||
graph_node->IncreaseLoadCount(); | |||||
AddGraphNode(graph_id, graph_node); | AddGraphNode(graph_id, graph_node); | ||||
return SUCCESS; | |||||
} | |||||
Status GraphManager::SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options) { | |||||
CompilerStages &stages = GetCompilerStages(graph_id); | |||||
stages.preparer.SetOptions(options_); | |||||
Status status = stages.optimizer.SetOptions(options_); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Graph optimizer set options failed."); | |||||
return status; | |||||
} | |||||
stages.builder.SetOptions(options_); | |||||
return SUCCESS; | |||||
} | |||||
Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
const std::map<std::string, std::string> &options, | |||||
const OmgContext &omg_context) { | |||||
IncreaseGraphCount(graph_id); | |||||
// validation for adding graphs of same graph_id in multi-thread secenario | |||||
// 1.previous thread owns same graph_id has finished the AddGraph procession | |||||
if (GetAddGraphCondition(graph_id) == kDoneAdded) { | |||||
GraphNodePtr graph_node; | |||||
if (GetGraphNode(graph_id, graph_node) != SUCCESS) { | |||||
GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "Graph not exist while done adding previously, graph_id = %u.", graph_id); | |||||
return GE_GRAPH_GRAPH_NOT_EXIST; | |||||
} | |||||
graph_node->IncreaseLoadCount(); | |||||
return SUCCESS; | |||||
} | |||||
// In multi-thread scenario, former thread owns same graph_id has been | |||||
// in the middle of the AddGraph procession while following threads have to wait until | |||||
// done adding graph of the former graph, avoiding repeatively adding same graph. | |||||
bool is_added = false; | |||||
if (CheckRepeatAdd(graph_id, is_added) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "CheckRepeatAdd for graph[id:%u] failed.", graph_id); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
// The former graph (from different thread) owns same graph id has been successfully added. | |||||
if (is_added) { | |||||
return SUCCESS; | |||||
} | |||||
// Do add graph | |||||
SetAddGraphCondition(graph_id, kStartAdd); | |||||
auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||||
GE_CHECK_NOTNULL(compute_graph); | |||||
compute_graph->SetGraphID(graph_id); | |||||
SetSessionGraphId(compute_graph, graph_id); | |||||
if (CreateGraphNode(graph_id, graph, options) != SUCCESS) { | |||||
GELOGE(FAILED, "Failed to create graph_node."); | |||||
return FAILED; | |||||
} | |||||
AddLocalOmgContext(graph_id, omg_context); | AddLocalOmgContext(graph_id, omg_context); | ||||
if (!options_.output_datatype.empty()) { | if (!options_.output_datatype.empty()) { | ||||
@@ -373,16 +496,18 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
return GRAPH_PARAM_INVALID; | return GRAPH_PARAM_INVALID; | ||||
} | } | ||||
CompilerStages &stages = GetCompilerStages(graph_id); | |||||
stages.preparer.SetOptions(options_); | |||||
Status status = stages.optimizer.SetOptions(options_); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Graph optimizer set options failed."); | |||||
return status; | |||||
if (SetStagesOptions(graph_id, options_) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Set stage options failed."); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
stages.builder.SetOptions(options_); | |||||
var_acc_ctrl_.AddGraph(graph_id, compute_graph); | var_acc_ctrl_.AddGraph(graph_id, compute_graph); | ||||
SetAddGraphCondition(graph_id, kDoneAdded); | |||||
// There are threads waitting for adding same graph | |||||
if (NotifyWaittingGraph(graph_id) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "NotifyWaittingGraph failed."); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -433,6 +558,10 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||||
GELOGE(FAILED, "GraphPtr make shared failed"); | GELOGE(FAILED, "GraphPtr make shared failed"); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// update option about tuning graph | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
ParseOption(options, TUNING_PATH, options_.tuning_path); | |||||
graph_node->SetGraph(graph_ptr); | graph_node->SetGraph(graph_ptr); | ||||
graph_node->SetOptions(options); | graph_node->SetOptions(options); | ||||
@@ -888,6 +1017,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
if (!graph_node->IsAsync()) { | if (!graph_node->IsAsync()) { | ||||
ret = LoadGraph(ge_root_model, graph_node); | ret = LoadGraph(ge_root_model, graph_node); | ||||
} else { | } else { | ||||
GE_CHECK_NOTNULL(ge_root_model); | |||||
ret = LoadGraphAsync(ge_root_model, graph_node); | ret = LoadGraphAsync(ge_root_model, graph_node); | ||||
} | } | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -902,6 +1032,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
if (!graph_node->IsAsync()) { | if (!graph_node->IsAsync()) { | ||||
ret = LoadGraph(ge_root_model_ptr, graph_node); | ret = LoadGraph(ge_root_model_ptr, graph_node); | ||||
} else { | } else { | ||||
GE_CHECK_NOTNULL(ge_root_model); | |||||
ret = LoadGraphAsync(ge_root_model_ptr, graph_node); | ret = LoadGraphAsync(ge_root_model_ptr, graph_node); | ||||
} | } | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -914,6 +1045,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | ||||
GELOGI("[LoadGraph] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | GELOGI("[LoadGraph] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | ||||
if (options_.run_graph_flag && ge_root_model != nullptr) { | if (options_.run_graph_flag && ge_root_model != nullptr) { | ||||
ge_root_model->SetTrainFlag(GetTrainFlag()); | |||||
// synchronization run graph with model | // synchronization run graph with model | ||||
std::shared_ptr<GraphModelListener> model_listener = GetModelListener(); | std::shared_ptr<GraphModelListener> model_listener = GetModelListener(); | ||||
ModelIdInfo model_id_info; | ModelIdInfo model_id_info; | ||||
@@ -1308,54 +1440,29 @@ bool GraphManager::CheckModelLoad(const GeRootModelPtr &ge_root_model, bool load | |||||
} | } | ||||
Status GraphManager::RemoveGraph(const GraphId &graph_id) { | Status GraphManager::RemoveGraph(const GraphId &graph_id) { | ||||
auto it = to_be_deleted_graphs_.find(graph_id); | |||||
if (it != to_be_deleted_graphs_.end()) { | |||||
to_be_deleted_graphs_.erase(it); | |||||
} | |||||
GraphNodePtr graph_node = nullptr; | GraphNodePtr graph_node = nullptr; | ||||
Status ret = GetGraphNode(graph_id, graph_node); | Status ret = GetGraphNode(graph_id, graph_node); | ||||
if (ret != SUCCESS) { | |||||
if (ret != SUCCESS || graph_node == nullptr) { | |||||
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s", | |||||
graph_id, __FUNCTION__); | |||||
GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id); | GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id); | ||||
return GE_GRAPH_GRAPH_NOT_EXIST; | return GE_GRAPH_GRAPH_NOT_EXIST; | ||||
} | } | ||||
if ((graph_node == nullptr) || (graph_node->GetRunFlag())) { | |||||
GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id); | |||||
return GE_GRAPH_GRAPH_IS_RUNNING; | |||||
if (graph_node->GetRunFlag()) { | |||||
// only put graph into to-be-deleted list when exceptional scenario | |||||
to_be_deleted_graphs_.insert(graph_id); | |||||
GELOGI("[GraphManager] Trying to remove running graph[Id:%u], added into to_be_deleted_graphs_.", graph_id); | |||||
return SUCCESS; | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(unload_model_mutex_); | std::lock_guard<std::mutex> lock(unload_model_mutex_); | ||||
Status middle_ret; | Status middle_ret; | ||||
rtError_t rt_ret; | rtError_t rt_ret; | ||||
const std::vector<SubGraphInfoPtr> &all_sub_graph = graph_node->GetAllSubGraph(); | |||||
for (size_t i = 0; i < all_sub_graph.size(); ++i) { | |||||
// must free buffer firstly | |||||
middle_ret = all_sub_graph[i]->FreeInOutBuffer(); | |||||
if (middle_ret != SUCCESS) { | |||||
GELOGE(middle_ret, "[GraphManager] RemoveGraph free mem failed, graph_id=%u.", graph_id); | |||||
ret = middle_ret; | |||||
} | |||||
if (all_sub_graph[i]->GeModelIsValid() && all_sub_graph[i]->GetModelIdInfo().model_id != INVALID_MODEL_ID) { | |||||
// unload model | |||||
GELOGI("UnloadModel via new ome."); | |||||
rt_ret = rtSetDevice(GetContext().DeviceId()); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", | |||||
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id); | |||||
ret = FAILED; | |||||
continue; | |||||
} | |||||
middle_ret = GraphLoader::UnloadModel(all_sub_graph[i]->GetModelIdInfo().model_id); | |||||
if (middle_ret != SUCCESS) { | |||||
GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.", | |||||
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id); | |||||
ret = middle_ret; | |||||
} | |||||
rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "[GraphManager:] unload model failed, modelId=%u, graphId=%u.", | |||||
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id); | |||||
ret = FAILED; | |||||
} | |||||
} | |||||
} | |||||
var_acc_ctrl_.RemoveGraph(graph_id); | var_acc_ctrl_.RemoveGraph(graph_id); | ||||
RemoveGraphNode(graph_id); | RemoveGraphNode(graph_id); | ||||
@@ -1363,28 +1470,33 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { | |||||
auto ge_root_model = graph_node->GetGeRootModel(); | auto ge_root_model = graph_node->GetGeRootModel(); | ||||
if (CheckModelLoad(ge_root_model, graph_node->GetLoadFlag())) { | if (CheckModelLoad(ge_root_model, graph_node->GetLoadFlag())) { | ||||
GELOGI("Unload model %u.", ge_root_model->GetModelId()); | |||||
rt_ret = rtSetDevice(GetContext().DeviceId()); | rt_ret = rtSetDevice(GetContext().DeviceId()); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | ||||
graph_id); | graph_id); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
middle_ret = GraphLoader::UnloadModel(ge_root_model->GetModelId()); | |||||
// same graph may be added for several times, different models were created separately, | |||||
// unload them respectively. | |||||
middle_ret = UnloadModel(ge_root_model, graph_id); | |||||
if (middle_ret != SUCCESS) { | if (middle_ret != SUCCESS) { | ||||
GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.", ge_root_model->GetModelId(), | |||||
graph_id); | |||||
REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check unload detail in GraphLoader %s", | |||||
graph_id, __FUNCTION__); | |||||
GELOGE(middle_ret, "[GraphManager:] unload model failed, graph_id=%u.", graph_id); | |||||
ret = middle_ret; | ret = middle_ret; | ||||
} | } | ||||
rt_ret = rtDeviceReset(GetContext().DeviceId()); | rt_ret = rtDeviceReset(GetContext().DeviceId()); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | |||||
graph_id); | |||||
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u, when GraphManager %s", | |||||
GetContext().DeviceId(), graph_id, __FUNCTION__); | |||||
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id); | |||||
ret = FAILED; | ret = FAILED; | ||||
} | } | ||||
} | } | ||||
RemoveCompilerStages(graph_id); | RemoveCompilerStages(graph_id); | ||||
RemoveGraphCount(graph_id); | |||||
RemoveAddGraphCondition(graph_id); | |||||
GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id); | GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id); | ||||
GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id); | GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id); | ||||
@@ -1466,6 +1578,10 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
GE_IF_BOOL_EXEC(ret != SUCCESS, | GE_IF_BOOL_EXEC(ret != SUCCESS, | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1."); | GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1."); | ||||
return GE_GRAPH_OPTIONS_INVALID); | return GE_GRAPH_OPTIONS_INVALID); | ||||
// Set Build model and step | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
ParseOption(options, BUILD_STEP, options_.tuning_path); | |||||
// ge.graphType. | // ge.graphType. | ||||
options_.run_graph_flag = true; | options_.run_graph_flag = true; | ||||
@@ -1514,10 +1630,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d", | GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d", | ||||
options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | ||||
// Set Build model and step | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1549,6 +1661,7 @@ void GraphManager::ParseOption(const std::map<std::string, std::string> &options | |||||
std::string &option) { | std::string &option) { | ||||
auto iter = options.find(key); | auto iter = options.find(key); | ||||
if (iter != options.end()) { | if (iter != options.end()) { | ||||
GELOGD("Set option %s from value %s to value%s", key.c_str(), option.c_str(), iter->second.c_str()); | |||||
option = iter->second; | option = iter->second; | ||||
} | } | ||||
} | } | ||||
@@ -2401,6 +2514,7 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr | |||||
Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | ||||
GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | ||||
if (options_.run_graph_flag && ge_root_model != nullptr) { | if (options_.run_graph_flag && ge_root_model != nullptr) { | ||||
ge_root_model->SetTrainFlag(GetTrainFlag()); | |||||
// synchronization run graph with model | // synchronization run graph with model | ||||
ModelIdInfo model_id_info; | ModelIdInfo model_id_info; | ||||
bool is_unknown_shape = false; | bool is_unknown_shape = false; | ||||
@@ -2417,9 +2531,9 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G | |||||
} | } | ||||
} | } | ||||
GE_TIMESTAMP_START(LoadGraph); | GE_TIMESTAMP_START(LoadGraph); | ||||
GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_); | |||||
Status ret = | |||||
GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); | |||||
auto listener = MakeShared<RunAsyncListener>(); | |||||
GE_CHECK_NOTNULL(listener); | |||||
Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, listener); | |||||
GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); | GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); | GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); | ||||
@@ -2433,6 +2547,52 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, | |||||
const std::vector<uint32_t> &model_ids, uint32_t graph_id, uint64_t session_id) { | |||||
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, when GraphManager %s", | |||||
GetContext().DeviceId(), __FUNCTION__); | |||||
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, graphId=%u.", graph_id); | |||||
return; | |||||
} | |||||
for (auto model_id : model_ids) { | |||||
uint64_t max_memory_size = 0; | |||||
Status result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size); | |||||
if (result != SUCCESS) { | |||||
continue; | |||||
} | |||||
GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, | |||||
max_memory_size); | |||||
if (model_ids.size() > 1) { | |||||
result = ge_model->GetSessionId(model_id, session_id); | |||||
if (result != SUCCESS) { | |||||
GELOGW("[GraphManager:] get session failed when dynamic memory, modelId=%u, graphId=%u.", model_id, | |||||
graph_id); | |||||
continue; | |||||
} | |||||
} | |||||
result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0); | |||||
if (result != SUCCESS) { | |||||
GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id, | |||||
graph_id); | |||||
} | |||||
result = GraphLoader::UnloadModel(model_id); | |||||
if (result != SUCCESS) { | |||||
GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||||
} | |||||
GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success.", graph_id, model_id); | |||||
} | |||||
graph_node->SetLoadFlag(false); | |||||
rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, when GraphManager %s", | |||||
GetContext().DeviceId(), __FUNCTION__); | |||||
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id); | |||||
return; | |||||
} | |||||
} | |||||
Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { | Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { | ||||
GELOGI("CheckAndReleaseMemory graph_id[%u]", graph_node->GetGraphId()); | GELOGI("CheckAndReleaseMemory graph_id[%u]", graph_node->GetGraphId()); | ||||
int64_t value = 0; | int64_t value = 0; | ||||
@@ -2476,6 +2636,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra | |||||
continue; | continue; | ||||
} | } | ||||
auto model_id = model->GetModelId(); | auto model_id = model->GetModelId(); | ||||
auto model_ids = model->GetAllModelId(); | |||||
// unload model not release | // unload model not release | ||||
bool is_unknown_shape = false; | bool is_unknown_shape = false; | ||||
GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape)); | GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape)); | ||||
@@ -2488,34 +2649,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra | |||||
GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id); | GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id); | ||||
continue; | continue; | ||||
} | } | ||||
uint64_t max_memory_size = 0; | |||||
result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size); | |||||
if (result != SUCCESS) { | |||||
continue; | |||||
} | |||||
GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, | |||||
max_memory_size); | |||||
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||||
continue; | |||||
} | |||||
result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0); | |||||
if (result != SUCCESS) { | |||||
GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id, | |||||
graph_id); | |||||
} | |||||
result = GraphLoader::UnloadModel(model_id); | |||||
if (result != SUCCESS) { | |||||
GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||||
} | |||||
rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||||
continue; | |||||
} | |||||
it.second->SetLoadFlag(false); | |||||
GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success and set LoadFlag to false.", graph_id, model_id); | |||||
ReleaseMemory(ge_model, it.second, model_ids, graph_id, session_id); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -2651,6 +2785,38 @@ void GraphManager::ConstructGeInput(const vector<InputTensorInfo> &inputs, vecto | |||||
} | } | ||||
} | } | ||||
Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, | |||||
GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model) { | |||||
if (!graph_manager->IsGraphNeedBuild(graph_node)) { | |||||
ge_root_model = graph_node->GetGeRootModel(); | |||||
return SUCCESS; | |||||
} | |||||
if (graph_node->GetBuildFlag()) { | |||||
ReturnError(graph_manager, args.callback, PARAM_INVALID, | |||||
"The graph " + std::to_string(graph_node->GetGraphId()) + | |||||
" need to re-build, you should remove it" | |||||
" from GE first, then AddGraph again and rebuild it."); | |||||
graph_node->Unlock(); | |||||
return PARAM_INVALID; | |||||
} | |||||
// check need incre build. | |||||
GeModelPtr ge_model = nullptr; | |||||
if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | |||||
std::vector<GeTensor> ge_inputs; | |||||
ConstructGeInput(args.input_tensor, ge_inputs); | |||||
Status ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | |||||
// release rts generate context | |||||
RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId()); | |||||
if (ret != SUCCESS) { | |||||
ReturnError(graph_manager, args.callback, ret, "PreRun Failed."); | |||||
return ret; | |||||
} | |||||
} | |||||
graph_node->SetBuildFlag(true); | |||||
graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); | |||||
return SUCCESS; | |||||
} | |||||
void GraphManager::PreRunThread(GraphManager *graph_manager) { | void GraphManager::PreRunThread(GraphManager *graph_manager) { | ||||
if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { | if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { | ||||
GELOGW("Set thread name failed."); | GELOGW("Set thread name failed."); | ||||
@@ -2663,7 +2829,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
continue; | continue; | ||||
} | } | ||||
GELOGI("A new loop start."); | |||||
GELOGI("[PreRunThread] A new loop start, graph_id:%u.", args.graph_id); | |||||
ErrorManager::GetInstance().SetErrorContext(args.error_context); | ErrorManager::GetInstance().SetErrorContext(args.error_context); | ||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | ||||
@@ -2679,7 +2845,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
"[RunGraph] graph not exist, graph_id=" + std::to_string(args.graph_id)); | "[RunGraph] graph not exist, graph_id=" + std::to_string(args.graph_id)); | ||||
return; | return; | ||||
} | } | ||||
// more than one graph owns same graph_id | |||||
uint32_t count = 0; | |||||
if (graph_manager->GetGraphCount(args.graph_id, count) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed.", args.graph_id); | |||||
return; | |||||
} | |||||
// Avoid repeatively prerun for graphs owns same graph_id in online inference concurrency | |||||
if (count > 1 && graph_node->GetBuildFlag()) { | |||||
graph_node->Lock(); | |||||
GELOGD("Avoid repeatively prerun, graph_id:%u.", args.graph_id); | |||||
// In online inference concurrency senario, graph_node is allowed to be locked for 'count' times | |||||
graph_node->SetSemSize(count); | |||||
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, | |||||
args.input_tensor, graph_node->GetGeRootModel(), GetThreadLocalContext(), args.callback })); | |||||
GELOGI("[PreRunThread] Loop end. Start to run with cached build model."); | |||||
continue; | |||||
} | |||||
// Cannot be put ahead of the repeatively prerun judgement | |||||
graph_node->Lock(); | graph_node->Lock(); | ||||
if (graph_node->GetRunFlag()) { | if (graph_node->GetRunFlag()) { | ||||
@@ -2711,46 +2894,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
// it will not execute graph preprocess, optimize, parition, build if the graph has built successful. | // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. | ||||
GELOGI("Start for run graph async."); | GELOGI("Start for run graph async."); | ||||
GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
if (graph_manager->IsGraphNeedBuild(graph_node)) { | |||||
if (graph_node->GetBuildFlag()) { | |||||
ReturnError(graph_manager, args.callback, PARAM_INVALID, | |||||
"The graph " + std::to_string(graph_node->GetGraphId()) + | |||||
" need to re-build, you should remove it" | |||||
" from GE first, then AddGraph again and rebuild it."); | |||||
ret = CheckIncreBuildAndPreRun(graph_manager, args, graph_node, ge_root_model); | |||||
if (ret != SUCCESS) { | |||||
graph_node->SetRunFlag(false); | |||||
if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) { | |||||
ReturnError(graph_manager, args.callback, ret, "CheckIncreBuildAndPreRun Failed, thread exit.."); | |||||
graph_node->Unlock(); | graph_node->Unlock(); | ||||
return; | return; | ||||
} else { | |||||
ReturnError(graph_manager, graph_node, args.callback, ret, | |||||
"CheckIncreBuildAndPreRun Failed, keep geop continue!"); | |||||
graph_node->Unlock(); | |||||
continue; | |||||
} | } | ||||
// check need incre build. | |||||
GeModelPtr ge_model = nullptr; | |||||
if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | |||||
std::vector<GeTensor> ge_inputs; | |||||
ConstructGeInput(args.input_tensor, ge_inputs); | |||||
ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | |||||
// release rts generate context | |||||
RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId()); | |||||
if (ret != SUCCESS) { | |||||
graph_node->SetRunFlag(false); | |||||
if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) { | |||||
ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | |||||
graph_node->Unlock(); | |||||
return; | |||||
} else { | |||||
ReturnError(graph_manager, graph_node, args.callback, ret, "PreRun Failed, keep geop continue!"); | |||||
graph_node->Unlock(); | |||||
continue; | |||||
} | |||||
} | |||||
} | |||||
graph_node->SetBuildFlag(true); | |||||
graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); | |||||
} else { | |||||
ge_root_model = graph_node->GetGeRootModel(); | |||||
} | } | ||||
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, | graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, | ||||
args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); | args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); | ||||
GELOGI("Loop end."); | |||||
GELOGI("[PreRunThread] Loop end."); | |||||
} | } | ||||
} | } | ||||
@@ -2847,16 +3008,13 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
continue; | continue; | ||||
} | } | ||||
GELOGI("A new loop start."); | |||||
GELOGI("[RunThread] A new loop start, graph_id:%u.", args.graph_id); | |||||
ErrorManager::GetInstance().SetErrorContext(args.error_context); | ErrorManager::GetInstance().SetErrorContext(args.error_context); | ||||
GetContext().SetSessionId(args.session_id); | GetContext().SetSessionId(args.session_id); | ||||
GetThreadLocalContext() = args.context; | GetThreadLocalContext() = args.context; | ||||
graph_manager->UpdateLocalOmgContext(args.graph_id); | graph_manager->UpdateLocalOmgContext(args.graph_id); | ||||
if (args.graph_node->graph_run_async_listener_ != nullptr) { | |||||
args.graph_node->graph_run_async_listener_->SetCallback(args.callback); | |||||
} | |||||
Status ret; | Status ret; | ||||
// parse inputs.dims to vector<vector<uint64_t>> dynamic_dims | // parse inputs.dims to vector<vector<uint64_t>> dynamic_dims | ||||
ret = graph_manager->ParseInputsDims(args.input_tensor); | ret = graph_manager->ParseInputsDims(args.input_tensor); | ||||
@@ -2866,8 +3024,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
return; | return; | ||||
} | } | ||||
args.graph_node->UpdateLoadFlag(); | |||||
if (!args.graph_node->GetLoadFlag()) { | if (!args.graph_node->GetLoadFlag()) { | ||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelLoad, ErrorMessage::kModelLoad); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelLoad, ErrorMessage::kModelLoad); | ||||
args.ge_root_model->SetTrainFlag(graph_manager->GetTrainFlag()); | |||||
ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); | ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); | ||||
if (ret != SUCCESS || args.ge_root_model == nullptr) { | if (ret != SUCCESS || args.ge_root_model == nullptr) { | ||||
StopQueue(graph_manager); | StopQueue(graph_manager); | ||||
@@ -2875,6 +3035,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
args.graph_node->Unlock(); | args.graph_node->Unlock(); | ||||
return; | return; | ||||
} | } | ||||
// control the times of graph loading in multi-thread scenario | |||||
args.graph_node->DecreaseLoadCount(); | |||||
args.graph_node->IncreaseLoadRecord(); | |||||
args.graph_node->SetLoadFlag(true); | args.graph_node->SetLoadFlag(true); | ||||
GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(), | GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(), | ||||
args.ge_root_model->GetModelId()); | args.ge_root_model->GetModelId()); | ||||
@@ -2890,7 +3054,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||||
} | } | ||||
ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | ||||
args.input_tensor); | |||||
args.input_tensor, args.callback); | |||||
args.graph_node->SetRunFlag(false); | args.graph_node->SetRunFlag(false); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); | ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); | ||||
@@ -3132,6 +3296,21 @@ Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPar | |||||
non_tuning_subgraphs.push_back(sub_graph_tmp); | non_tuning_subgraphs.push_back(sub_graph_tmp); | ||||
} | } | ||||
} | } | ||||
// for function graphs to tune | |||||
for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | |||||
auto subgraph_list = sub_graph_map[function_graph]; | |||||
for (const auto &sub_graph_info_ptr : subgraph_list) { | |||||
GE_CHECK_NOTNULL(sub_graph_info_ptr); | |||||
ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph(); | |||||
// need to tuning | |||||
if (sub_graph_info_ptr->GetEngineName() == kVectorEngine || | |||||
sub_graph_info_ptr->GetEngineName() == kAIcoreEngine) { | |||||
tuning_subgraphs.push_back(sub_graph_tmp); | |||||
} else { | |||||
non_tuning_subgraphs.push_back(sub_graph_tmp); | |||||
} | |||||
} | |||||
} | |||||
return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path); | return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path); | ||||
} | } | ||||
@@ -3291,4 +3470,49 @@ void GraphManager::RemoveCompilerStages(GraphId graph_id) { | |||||
std::lock_guard<std::mutex> lock(member_mutex_); | std::lock_guard<std::mutex> lock(member_mutex_); | ||||
compiler_stages_.erase(graph_id); | compiler_stages_.erase(graph_id); | ||||
} | } | ||||
void GraphManager::IncreaseGraphCount(GraphId graph_id) { | |||||
std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||||
auto it = graph_count_.find(graph_id); | |||||
if (it == graph_count_.end()) { | |||||
graph_count_.insert({graph_id, kInitGraphCount}); | |||||
GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||||
} else { | |||||
++graph_count_[graph_id]; | |||||
GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||||
} | |||||
} | |||||
void GraphManager::RemoveGraphCount(GraphId graph_id) { | |||||
std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||||
auto it = graph_count_.find(graph_id); | |||||
if (it == graph_count_.end()) { | |||||
GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id); | |||||
} else { | |||||
GELOGD("RemoveGraphCount success, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||||
graph_count_.erase(it); | |||||
} | |||||
} | |||||
void GraphManager::DecreaseGraphCount(GraphId graph_id) { | |||||
std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||||
auto it = graph_count_.find(graph_id); | |||||
if (it == graph_count_.end()) { | |||||
GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id); | |||||
} else { | |||||
--it->second; | |||||
GELOGD("After DecreaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||||
} | |||||
} | |||||
Status GraphManager::GetGraphCount(GraphId graph_id, uint32_t &count) { | |||||
std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||||
auto it = graph_count_.find(graph_id); | |||||
if (it == graph_count_.end()) { | |||||
GELOGW("Graph [id:%u] has not been added.", graph_id); | |||||
return FAILED; | |||||
} | |||||
count = it->second; | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -184,6 +184,20 @@ class GraphManager { | |||||
Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results); | Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results); | ||||
void RemoveGraphCount(GraphId graph_id); | |||||
void IncreaseGraphCount(GraphId graph_id); | |||||
void DecreaseGraphCount(GraphId graph_id); | |||||
Status GetGraphCount(GraphId graph_id, uint32_t &count); | |||||
void SetAddGraphCondition(GraphId graph_id, uint32_t cond); | |||||
uint32_t GetAddGraphCondition(GraphId graph_id); | |||||
void RemoveAddGraphCondition(GraphId graph_id); | |||||
private: | private: | ||||
struct CompilerStages { | struct CompilerStages { | ||||
GraphPrepare preparer; | GraphPrepare preparer; | ||||
@@ -380,6 +394,24 @@ class GraphManager { | |||||
CompilerStages &GetCompilerStages(GraphId graph_id); | CompilerStages &GetCompilerStages(GraphId graph_id); | ||||
void RemoveCompilerStages(GraphId graph_id); | void RemoveCompilerStages(GraphId graph_id); | ||||
static Status CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, GraphNodePtr &graph_node, | |||||
GeRootModelPtr &ge_root_model); | |||||
void ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, const std::vector<uint32_t> &model_ids, | |||||
uint32_t graph_id, uint64_t session_id); | |||||
Status CheckRepeatAdd(uint32_t graph_id, bool &is_added); | |||||
Status NotifyWaittingGraph(uint32_t graph_id); | |||||
Status CreateGraphNode(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options); | |||||
Status SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options); | |||||
Status UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id); | |||||
void SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id); | |||||
std::atomic_bool thread_run_flag_; | std::atomic_bool thread_run_flag_; | ||||
BlockingQueue<PreRunArgs> prerun_args_q_{}; | BlockingQueue<PreRunArgs> prerun_args_q_{}; | ||||
BlockingQueue<RunArgs> run_args_q_{}; | BlockingQueue<RunArgs> run_args_q_{}; | ||||
@@ -415,6 +447,16 @@ class GraphManager { | |||||
std::mutex member_mutex_; | std::mutex member_mutex_; | ||||
std::mutex unload_model_mutex_; | std::mutex unload_model_mutex_; | ||||
// avoid repeatively add same graph (owns same graph id) | |||||
std::mutex add_graph_mutex_; | |||||
std::mutex add_graph_cond_mutex_; | |||||
std::condition_variable add_graph_cv_; | |||||
std::map<GraphId, uint32_t> graph_id_to_add_graph_cond_; | |||||
// use for multi-thread online-infer scenario | |||||
std::set<GraphId> to_be_deleted_graphs_; | |||||
std::map<GraphId, uint32_t> graph_count_; | |||||
std::mutex graph_count_mutex_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -60,6 +60,15 @@ void GraphNode::Unlock() { | |||||
sem_.Pop(unused); | sem_.Pop(unused); | ||||
} | } | ||||
void GraphNode::IncreaseLoadCount() { | |||||
std::unique_lock<std::mutex> lock(load_count_mu_); | |||||
if (load_record_ == kMaxLoadNum) { | |||||
GELOGW("Reach the maximum of load_count:%u", kMaxLoadNum); | |||||
return; | |||||
} | |||||
++load_count_; | |||||
} | |||||
SubGraphInfo::SubGraphInfo() : subgraph_ptr_(nullptr), ge_model_ptr_(nullptr), malloc_flag_(false) {} | SubGraphInfo::SubGraphInfo() : subgraph_ptr_(nullptr), ge_model_ptr_(nullptr), malloc_flag_(false) {} | ||||
SubGraphInfo::~SubGraphInfo() { | SubGraphInfo::~SubGraphInfo() { | ||||
@@ -55,6 +55,7 @@ using ConstGraphPtr = std::shared_ptr<const ge::Graph>; | |||||
using GraphPtr = std::shared_ptr<ge::Graph>; | using GraphPtr = std::shared_ptr<ge::Graph>; | ||||
const uint64_t INVALID_SESSION_ID = 0xffffffffffffffffULL; | const uint64_t INVALID_SESSION_ID = 0xffffffffffffffffULL; | ||||
const uint32_t kMaxLoadNum = 8; | |||||
struct ModelIdInfo { | struct ModelIdInfo { | ||||
uint32_t model_id{INVALID_MODEL_ID}; | uint32_t model_id{INVALID_MODEL_ID}; | ||||
@@ -162,6 +163,8 @@ class GraphNode { | |||||
bool GetBuildFlag() const { return build_flag_; } | bool GetBuildFlag() const { return build_flag_; } | ||||
void SetBuildFlag(bool buildFlag) { build_flag_ = buildFlag; } | void SetBuildFlag(bool buildFlag) { build_flag_ = buildFlag; } | ||||
bool GetLoadFlag() const { return load_flag_; } | bool GetLoadFlag() const { return load_flag_; } | ||||
// allow repeatively load graph owns same graph id | |||||
void UpdateLoadFlag() { load_flag_ = load_count_ == 0 || load_record_ >= kMaxLoadNum; } | |||||
void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; } | void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; } | ||||
void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; } | void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; } | ||||
GeModelPtr GetGeModel() const { return ge_model_; } | GeModelPtr GetGeModel() const { return ge_model_; } | ||||
@@ -172,6 +175,13 @@ class GraphNode { | |||||
void Lock(); | void Lock(); | ||||
void Unlock(); | void Unlock(); | ||||
void SetSemSize(uint32_t size) { sem_.SetMaxSize(size); } | |||||
uint32_t GetLoadCount() const { return load_count_; } | |||||
void IncreaseLoadCount(); | |||||
void DecreaseLoadCount() { --load_count_; } | |||||
void IncreaseLoadRecord() { ++load_record_; } | |||||
// run graph asynchronous listener | // run graph asynchronous listener | ||||
std::shared_ptr<RunAsyncListener> graph_run_async_listener_; | std::shared_ptr<RunAsyncListener> graph_run_async_listener_; | ||||
@@ -184,11 +194,17 @@ class GraphNode { | |||||
GraphPtr graph_; | GraphPtr graph_; | ||||
ComputeGraphPtr compute_graph_; | ComputeGraphPtr compute_graph_; | ||||
bool build_flag_; | bool build_flag_; | ||||
// load_flag_ is true if more than 1 model were loaded | |||||
bool load_flag_; | bool load_flag_; | ||||
bool async_; | bool async_; | ||||
GeModelPtr ge_model_; | GeModelPtr ge_model_; | ||||
GeRootModelPtr ge_root_model_; | GeRootModelPtr ge_root_model_; | ||||
BlockingQueue<uint8_t> sem_; | BlockingQueue<uint8_t> sem_; | ||||
// consist with graph_count of same graph_id in graph_manager | |||||
uint32_t load_count_ = 0; | |||||
// total times of loading a graph with same graph_id. | |||||
uint32_t load_record_ = 0; | |||||
std::mutex load_count_mu_; | |||||
}; | }; | ||||
using GraphNodePtr = std::shared_ptr<GraphNode>; | using GraphNodePtr = std::shared_ptr<GraphNode>; | ||||
@@ -249,6 +265,7 @@ struct GraphManagerOptions { | |||||
std::string save_original_model; | std::string save_original_model; | ||||
std::string build_mode; | std::string build_mode; | ||||
std::string build_step; | std::string build_step; | ||||
std::string tuning_path; | |||||
std::string input_shape; | std::string input_shape; | ||||
std::string dynamic_dims; | std::string dynamic_dims; | ||||
int32_t dynamic_node_type = -1; | int32_t dynamic_node_type = -1; | ||||
@@ -275,7 +292,8 @@ struct GraphManagerOptions { | |||||
is_single_op(false), | is_single_op(false), | ||||
save_original_model("false"), | save_original_model("false"), | ||||
build_mode(""), | build_mode(""), | ||||
build_step("") {} | |||||
build_step(""), | |||||
tuning_path(""){} | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -347,14 +347,18 @@ ge::Status VarManager::Init(const uint32_t &version, const uint64_t &session_id, | |||||
const uint64_t &job_id) { | const uint64_t &job_id) { | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
GELOGI("VarManager::Init, session id = %lu.", session_id); | GELOGI("VarManager::Init, session id = %lu.", session_id); | ||||
version_ = version; | |||||
device_id_ = device_id; | |||||
session_id_ = session_id; | |||||
job_id_ = job_id; | |||||
var_resource_ = std::unique_ptr<VarResource>(new (std::nothrow) VarResource(session_id_)); | |||||
if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
GELOGW("VarManager has not been init."); | |||||
return ge::INTERNAL_ERROR; | |||||
version_ = version; | |||||
device_id_ = device_id; | |||||
session_id_ = session_id; | |||||
job_id_ = job_id; | |||||
var_resource_ = std::unique_ptr<VarResource>(new (std::nothrow) VarResource(session_id_)); | |||||
if (var_resource_ == nullptr) { | |||||
GELOGW("VarManager init failed session id = %lu.", session_id); | |||||
return ge::INTERNAL_ERROR; | |||||
} | |||||
} else { | |||||
GELOGW("VarManager::has been inited, session id = %lu.", session_id); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -126,11 +126,11 @@ bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int6 | |||||
bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) { | bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
std::map<string, std::map<int, int>> node_workspace_offset; | |||||
std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size; | |||||
bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); | bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); | ||||
bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); | bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); | ||||
node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); | |||||
if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { | |||||
atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size); | |||||
if (!has_atomic_input && has_atomic_output && atomic_workspace_index_size.empty()) { | |||||
std::vector<int64_t> atomic_output_index; | std::vector<int64_t> atomic_output_index; | ||||
(void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); | (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); | ||||
bool is_all_output_peer_also_atomic = true; | bool is_all_output_peer_also_atomic = true; | ||||
@@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect | |||||
} | } | ||||
} | } | ||||
} | } | ||||
return LinkToPotentialPrecedenceNode(graph, clean_addr_node); | |||||
} | |||||
// Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean | |||||
// node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream | |||||
// concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control | |||||
// edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on | |||||
// each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the | |||||
// successors of Data/Variable. | |||||
Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) { | |||||
GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.", | |||||
atomic_clean_node->GetName().c_str()); | |||||
auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); | |||||
GE_CHECK_NOTNULL(out_ctrl_anchor); | |||||
for (const auto &node : graph->GetDirectNode()) { | |||||
GE_CHECK_NOTNULL(node); | |||||
bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty(); | |||||
if (!need_handle) { | |||||
continue; | |||||
} | |||||
auto second_nodes = node->GetOutAllNodes(); | |||||
for (const auto &second_node : second_nodes) { | |||||
GE_CHECK_NOTNULL(second_node); | |||||
auto in_ctrl_anchor = second_node->GetInControlAnchor(); | |||||
GE_CHECK_NOTNULL(in_ctrl_anchor); | |||||
if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) { | |||||
GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor)); | |||||
GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str()); | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -332,11 +365,11 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { | |||||
} | } | ||||
// 2.Check atomic attr in node | // 2.Check atomic attr in node | ||||
std::map<string, std::map<int, int>> node_workspace_offset; | |||||
std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size; | |||||
bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); | bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); | ||||
bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); | bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); | ||||
node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); | |||||
if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) { | |||||
atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size); | |||||
if (!has_atomic_input && !has_atomic_output && atomic_workspace_index_size.empty()) { | |||||
return false; | return false; | ||||
} | } | ||||
@@ -68,6 +68,14 @@ class AtomicAddrCleanPass : public GraphPass { | |||||
Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); | Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); | ||||
/** | /** | ||||
* Link atomic clean node to all potential precedence nodes which may execute before atomic clean node | |||||
* @param graph | |||||
* @param atomic_clean_node | |||||
* @return | |||||
*/ | |||||
Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node); | |||||
/** | |||||
* Check if this node is atomic op. | * Check if this node is atomic op. | ||||
* @param node | * @param node | ||||
* @return | * @return | ||||
@@ -137,7 +137,6 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
stream_label = node->GetInDataNodes().at(0)->GetName(); | stream_label = node->GetInDataNodes().at(0)->GetName(); | ||||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||||
bool value = false; | bool value = false; | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
@@ -30,8 +30,15 @@ constexpr int kMaxRePassTimes = 10000; | |||||
constexpr size_t kMaxOneInNodes = 1000; | constexpr size_t kMaxOneInNodes = 1000; | ||||
// Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later | // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later | ||||
constexpr int kMaxRecursiveDepth = 20; | constexpr int kMaxRecursiveDepth = 20; | ||||
struct DuringPassNodeSets { | |||||
std::unordered_set<Node *> nodes_seen; | |||||
std::unordered_set<NodePtr> nodes_deleted; | |||||
std::unordered_set<NodePtr> nodes_re_pass; | |||||
std::unordered_set<NodePtr> nodes_re_pass_immediately; | |||||
std::unordered_set<NodePtr> nodes_last; | |||||
}; | |||||
void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &input_edge_nodes, | |||||
void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes, | |||||
std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | ||||
nodes_last.clear(); | nodes_last.clear(); | ||||
for (auto &node : graph->GetDirectNode()) { | for (auto &node : graph->GetDirectNode()) { | ||||
@@ -40,7 +47,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||||
} | } | ||||
size_t in_nums = node->GetInNodes().size(); | size_t in_nums = node->GetInNodes().size(); | ||||
if (in_nums == 0) { | if (in_nums == 0) { | ||||
input_edge_nodes.push(node); | |||||
input_edge_nodes.push_back(node); | |||||
nodes_seen.insert(node.get()); | nodes_seen.insert(node.get()); | ||||
} else if (in_nums > kMaxOneInNodes) { | } else if (in_nums > kMaxOneInNodes) { | ||||
nodes_last.insert(node); | nodes_last.insert(node); | ||||
@@ -48,7 +55,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||||
} | } | ||||
} | } | ||||
void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &nodes_to_pass, | |||||
void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass, | |||||
std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | ||||
for (auto &node : nodes) { | for (auto &node : nodes) { | ||||
if (node == nullptr) { | if (node == nullptr) { | ||||
@@ -60,13 +67,30 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &n | |||||
bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | ||||
if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | ||||
nodes_to_pass.push(node); | |||||
nodes_to_pass.push_back(node); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set<NodePtr> &nodes_re_pass, | |||||
std::unordered_set<NodePtr> &nodes_deleted, std::unordered_set<Node *> &nodes_seen) { | |||||
void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass, | |||||
std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass, | |||||
std::unordered_set<NodePtr> &nodes_re_pass) { | |||||
for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||||
if (node_to_re_pass == nullptr) { | |||||
GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||||
node->GetName().c_str(), node->GetType().c_str()); | |||||
continue; | |||||
} | |||||
if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||||
GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str()); | |||||
nodes_re_pass.insert(node_to_re_pass); | |||||
} else { | |||||
GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||||
} | |||||
} | |||||
} | |||||
Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNodeSets &during_pass_node_set) { | |||||
if (node == nullptr) { | if (node == nullptr) { | ||||
GELOGE(FAILED, "parameter is null."); | GELOGE(FAILED, "parameter is null."); | ||||
return FAILED; | return FAILED; | ||||
@@ -90,22 +114,15 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder | |||||
} | } | ||||
auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); | auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); | ||||
for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||||
if (node_to_re_pass == nullptr) { | |||||
GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||||
node->GetName().c_str(), node->GetType().c_str()); | |||||
continue; | |||||
} | |||||
if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||||
GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); | |||||
nodes_re_pass.insert(node_to_re_pass); | |||||
} else { | |||||
GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||||
} | |||||
} | |||||
PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass, | |||||
during_pass_node_set.nodes_re_pass); | |||||
auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately(); | |||||
PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately, | |||||
during_pass_node_set.nodes_re_pass_immediately); | |||||
auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); | auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); | ||||
nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||||
during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||||
if (nodes_deleted_by_pass.count(node) > 0) { | if (nodes_deleted_by_pass.count(node) > 0) { | ||||
GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), | GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), | ||||
name_to_pass.first.c_str()); | name_to_pass.first.c_str()); | ||||
@@ -181,36 +198,33 @@ Status GEPass::Run(const NamesToPass &names_to_passes) { | |||||
Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | ||||
GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); | GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); | ||||
std::queue<NodePtr> nodes; | |||||
std::unordered_set<Node *> nodes_seen; | |||||
std::unordered_set<NodePtr> nodes_deleted; | |||||
std::unordered_set<NodePtr> nodes_re_pass; | |||||
std::unordered_set<NodePtr> nodes_last; | |||||
GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last); | |||||
std::deque<NodePtr> nodes; | |||||
DuringPassNodeSets during_pass_node_set; | |||||
GetAllNodesNoInputEdge(graph_, nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||||
GELOGD("Start points count %zu", nodes.size()); | GELOGD("Start points count %zu", nodes.size()); | ||||
int re_pass_times = 0; | int re_pass_times = 0; | ||||
do { | do { | ||||
for (auto &node : nodes_re_pass) { | |||||
nodes.push(node); | |||||
nodes_seen.insert(node.get()); | |||||
for (auto &node : during_pass_node_set.nodes_re_pass) { | |||||
nodes.push_back(node); | |||||
during_pass_node_set.nodes_seen.insert(node.get()); | |||||
} | } | ||||
nodes_re_pass.clear(); | |||||
during_pass_node_set.nodes_re_pass.clear(); | |||||
while (!nodes.empty()) { | while (!nodes.empty()) { | ||||
NodePtr node = nodes.front(); | NodePtr node = nodes.front(); | ||||
nodes.pop(); | |||||
nodes.pop_front(); | |||||
(void)nodes_re_pass.erase(node); | |||||
(void)during_pass_node_set.nodes_re_pass.erase(node); | |||||
GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); | GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); | ||||
if (nodes_deleted.count(node) > 0) { | |||||
if (during_pass_node_set.nodes_deleted.count(node) > 0) { | |||||
GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); | GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); | ||||
continue; | continue; | ||||
} | } | ||||
AddNextIterNodes(node->GetOutNodes(), nodes, nodes_seen, nodes_last); | |||||
AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||||
auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||||
auto ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | ||||
node->GetName().c_str(), node->GetType().c_str(), ret); | node->GetName().c_str(), node->GetType().c_str(), ret); | ||||
@@ -227,7 +241,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||||
if (has_sub_graph) { | if (has_sub_graph) { | ||||
GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str()); | GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str()); | ||||
SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); | SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); | ||||
ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||||
ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | ||||
node->GetName().c_str(), node->GetType().c_str(), ret); | node->GetName().c_str(), node->GetType().c_str(), ret); | ||||
@@ -239,16 +253,21 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||||
// should be called each time at the begin of the iteration | // should be called each time at the begin of the iteration | ||||
ClearOption(names_to_passes); | ClearOption(names_to_passes); | ||||
} | } | ||||
for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) { | |||||
GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str()); | |||||
nodes.push_front(node); | |||||
} | |||||
during_pass_node_set.nodes_re_pass_immediately.clear(); | |||||
} | } | ||||
for (auto &node : nodes_last) { | |||||
bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | |||||
if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | |||||
nodes.push(node); | |||||
for (auto &node : during_pass_node_set.nodes_last) { | |||||
bool all_in_nodes_seen = node->IsAllInNodesSeen(during_pass_node_set.nodes_seen); | |||||
if (all_in_nodes_seen && during_pass_node_set.nodes_seen.insert(node.get()).second) { | |||||
nodes.push_back(node); | |||||
} | } | ||||
} | } | ||||
nodes_last.clear(); | |||||
} while ((!nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||||
during_pass_node_set.nodes_last.clear(); | |||||
} while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||||
if (re_pass_times == kMaxRePassTimes) { | if (re_pass_times == kMaxRePassTimes) { | ||||
GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); | GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); | ||||
@@ -53,6 +53,8 @@ class BaseNodePass { | |||||
std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; } | std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; } | ||||
std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; } | |||||
std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; } | std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; } | ||||
void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } | void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } | ||||
@@ -62,6 +64,7 @@ class BaseNodePass { | |||||
void init() { | void init() { | ||||
nodes_need_re_pass_.clear(); | nodes_need_re_pass_.clear(); | ||||
nodes_deleted_.clear(); | nodes_deleted_.clear(); | ||||
nodes_need_re_pass_immediately_.clear(); | |||||
} | } | ||||
protected: | protected: | ||||
@@ -80,6 +83,14 @@ class BaseNodePass { | |||||
void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } | void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } | ||||
/// | /// | ||||
/// Add a node to be optimized immediately again. If you add a new node to the graph, or | |||||
/// change a node connections, and you want to make sure the node will be | |||||
/// optimized by other passes, call this function. | |||||
/// @param node | |||||
/// | |||||
void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); } | |||||
/// | |||||
/// Add a node and it's input/output data nodes to be optimized again. | /// Add a node and it's input/output data nodes to be optimized again. | ||||
/// @param node | /// @param node | ||||
/// | /// | ||||
@@ -109,6 +120,7 @@ class BaseNodePass { | |||||
private: | private: | ||||
std::unordered_set<NodePtr> nodes_need_re_pass_; | std::unordered_set<NodePtr> nodes_need_re_pass_; | ||||
std::unordered_set<NodePtr> nodes_need_re_pass_immediately_; | |||||
std::unordered_set<NodePtr> nodes_deleted_; | std::unordered_set<NodePtr> nodes_deleted_; | ||||
std::map<NodePassOption, std::string> options_; | std::map<NodePassOption, std::string> options_; | ||||
}; | }; | ||||
@@ -25,6 +25,7 @@ | |||||
namespace ge { | namespace ge { | ||||
Status InferShapePass::Run(NodePtr &node) { | Status InferShapePass::Run(NodePtr &node) { | ||||
// kOptimizeAfterSubGraph exist means after subgraph | |||||
auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph)); | auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph)); | ||||
if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
// select INFERSHAPE failed info | // select INFERSHAPE failed info | ||||
@@ -41,6 +42,20 @@ Status InferShapePass::Run(NodePtr &node) { | |||||
GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); | GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); | ||||
return GE_GRAPH_INFERSHAPE_FAILED; | return GE_GRAPH_INFERSHAPE_FAILED; | ||||
} | } | ||||
bool need_repass = false; | |||||
auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "need_infer_again_", need_repass); | |||||
if (has_attr) { | |||||
if (!OptionExists(kOptimizeAfterSubGraph)) { | |||||
return SUCCESS; | |||||
} | |||||
if (need_repass) { | |||||
AddImmediateRePassNode(node); | |||||
GELOGD("Node %s need repass immediately.", node->GetName().c_str()); | |||||
} else { | |||||
// clear attr on while | |||||
node->GetOpDesc()->DelAttr("need_infer_again_"); | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -23,7 +23,9 @@ namespace ge { | |||||
Status MergeInputMemcpyPass::Run(ComputeGraphPtr graph) { | Status MergeInputMemcpyPass::Run(ComputeGraphPtr graph) { | ||||
GELOGD("MergeInputMemcpyPass Enter"); | GELOGD("MergeInputMemcpyPass Enter"); | ||||
for (const auto &node : graph->GetDirectNode()) { | for (const auto &node : graph->GetDirectNode()) { | ||||
if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) { | |||||
std::string type; | |||||
GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed."); | |||||
if ((type != MERGE) && (type != REFMERGE)) { | |||||
continue; | continue; | ||||
} | } | ||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
@@ -95,4 +97,3 @@ NodePtr MergeInputMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph | |||||
return graph->AddNode(op_desc); | return graph->AddNode(op_desc); | ||||
} | } | ||||
} // namespace ge | } // namespace ge | ||||
@@ -25,7 +25,9 @@ Status MergeToStreamMergePass::Run(ComputeGraphPtr graph) { | |||||
bypass_nodes_.clear(); | bypass_nodes_.clear(); | ||||
for (const auto &node : graph->GetDirectNode()) { | for (const auto &node : graph->GetDirectNode()) { | ||||
if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) { | |||||
std::string type; | |||||
GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed."); | |||||
if ((type != MERGE) && (type != REFMERGE)) { | |||||
continue; | continue; | ||||
} | } | ||||
@@ -555,6 +555,8 @@ void NetOutputPass::AddInOutForNetOutputOp(const ComputeGraphPtr &graph, OpDescP | |||||
return; | return; | ||||
} | } | ||||
ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); | ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); | ||||
out_desc.SetFormat(FORMAT_ND); | |||||
out_desc.SetOriginFormat(FORMAT_ND); | |||||
GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); | GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); | ||||
is_input_const.push_back(PassUtils::IsConstant(src_node)); | is_input_const.push_back(PassUtils::IsConstant(src_node)); | ||||
++iter; | ++iter; | ||||
@@ -101,7 +101,8 @@ Status NextIterationPass::FindWhileGroups() { | |||||
const std::string &frame_name = loop_group_iter.first; | const std::string &frame_name = loop_group_iter.first; | ||||
for (const auto &enter_node : loop_group_iter.second->enter_nodes) { | for (const auto &enter_node : loop_group_iter.second->enter_nodes) { | ||||
for (const auto &out_node : enter_node->GetOutAllNodes()) { | for (const auto &out_node : enter_node->GetOutAllNodes()) { | ||||
const string &type = out_node->GetType(); | |||||
std::string type; | |||||
GE_CHK_STATUS_RET(GetOriginalType(out_node, type), "Get node type failed."); | |||||
if ((type != MERGE) && (type != REFMERGE)) { | if ((type != MERGE) && (type != REFMERGE)) { | ||||
continue; | continue; | ||||
} | } | ||||
@@ -310,7 +311,8 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string | |||||
} | } | ||||
for (const auto &tmp_node : nodes) { | for (const auto &tmp_node : nodes) { | ||||
const std::string type = tmp_node->GetType(); | |||||
std::string type; | |||||
GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "Get node type failed."); | |||||
if ((target_type == LOOPCOND) && (type == target_type)) { | if ((target_type == LOOPCOND) && (type == target_type)) { | ||||
target_node = tmp_node; | target_node = tmp_node; | ||||
break; | break; | ||||
@@ -35,9 +35,9 @@ | |||||
#include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "utils/node_utils.h" | |||||
namespace ge { | namespace ge { | ||||
Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data, | Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data, | ||||
std::vector<GeTensorPtr> &v_output, const bool scalar_output) { | std::vector<GeTensorPtr> &v_output, const bool scalar_output) { | ||||
Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
@@ -246,6 +246,12 @@ NodePtr PassUtils::GetInDataNode(const ConstNodePtr &node, int index) { | |||||
return src_node; | return src_node; | ||||
} | } | ||||
NodePtr PassUtils::GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index) { | |||||
auto src_node = GetInDataNode(node, index); | |||||
return NodeUtils::GetInNodeCrossSubgraph(src_node); | |||||
} | |||||
bool PassUtils::IsNeedTrainIteFlowCtrl(const ComputeGraphPtr &compute_graph) { | bool PassUtils::IsNeedTrainIteFlowCtrl(const ComputeGraphPtr &compute_graph) { | ||||
if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
return false; | return false; | ||||
@@ -30,6 +30,8 @@ class PassUtils { | |||||
static NodePtr GetInDataNode(const ConstNodePtr &node, int index); | static NodePtr GetInDataNode(const ConstNodePtr &node, int index); | ||||
static NodePtr GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index); | |||||
static bool IsConstant(const ConstNodePtr &node); | static bool IsConstant(const ConstNodePtr &node); | ||||
static Status SetOutNodeWeight(const OutDataAnchorPtr &out_data_anchor, const NodePtr &src_node); | static Status SetOutNodeWeight(const OutDataAnchorPtr &out_data_anchor, const NodePtr &src_node); | ||||
@@ -279,7 +279,7 @@ Status SubexpressionMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra | |||||
const auto &in_anchor = in_anchors.at(i); | const auto &in_anchor = in_anchors.at(i); | ||||
const auto &base_node = in_anchor->GetOwnerNode(); | const auto &base_node = in_anchor->GetOwnerNode(); | ||||
GELOGD("Get Data direct node: %s", base_node->GetName().c_str()); | GELOGD("Get Data direct node: %s", base_node->GetName().c_str()); | ||||
if (!base_node->GetHostNode()) { | |||||
if (!base_node->GetHostNode() || base_node->GetType() == SWITCH) { | |||||
continue; | continue; | ||||
} | } | ||||
@@ -94,6 +94,12 @@ Status SwitchDeadBranchElimination::DeleteSwitchNode(NodePtr &node, NodePtr &pre | |||||
GELOGE(FAILED, "parameter is null."); | GELOGE(FAILED, "parameter is null."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// If two nodes aren't in same graph, get node's direct in_node instead of pred_node. | |||||
if (node->GetOwnerComputeGraph() != pred_node->GetOwnerComputeGraph()) { | |||||
pred_node = PassUtils::GetInDataNode(node, kPredInputIndex); | |||||
} | |||||
// link pred's in control nodes to switch | // link pred's in control nodes to switch | ||||
if (GraphUtils::CopyInCtrlEdges(pred_node, node) != GRAPH_SUCCESS) { | if (GraphUtils::CopyInCtrlEdges(pred_node, node) != GRAPH_SUCCESS) { | ||||
return FAILED; | return FAILED; | ||||
@@ -131,7 +137,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
auto pred_node = PassUtils::GetInDataNode(node, kPredInputIndex); | |||||
auto pred_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kPredInputIndex); | |||||
if (pred_node == nullptr) { | if (pred_node == nullptr) { | ||||
GELOGD("[%s] Pred input is null.", node->GetName().c_str()); | GELOGD("[%s] Pred input is null.", node->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -143,7 +149,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
auto input_node = PassUtils::GetInDataNode(node, kDataInputIndex); | |||||
auto input_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kDataInputIndex); | |||||
if (input_node == nullptr) { | if (input_node == nullptr) { | ||||
GELOGD("[%s] Data input is null.", node->GetName().c_str()); | GELOGD("[%s] Data input is null.", node->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -448,6 +448,8 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) | |||||
// select first stream_switch | // select first stream_switch | ||||
NodePtr stream_switch = switch_list.front(); | NodePtr stream_switch = switch_list.front(); | ||||
// set stream_label | |||||
GE_CHK_STATUS_RET(SetStreamLabel(stream_switch, cast_node->GetName()), "Set stream label failed."); | |||||
OpDescPtr switch_desc = stream_switch->GetOpDesc(); | OpDescPtr switch_desc = stream_switch->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(switch_desc); | GE_CHECK_NOTNULL(switch_desc); | ||||
switch_desc->SetName(CheckDuplicateName(cond_group + "/" + STREAMSWITCH + (true_branch_flag ? "_t" : "_f"))); | switch_desc->SetName(CheckDuplicateName(cond_group + "/" + STREAMSWITCH + (true_branch_flag ? "_t" : "_f"))); | ||||
@@ -23,6 +23,7 @@ | |||||
#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | ||||
#include "common/formats/format_transfers/format_transfer_transpose.h" | #include "common/formats/format_transfers/format_transfer_transpose.h" | ||||
#include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
@@ -1304,7 +1305,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
auto format = desc.GetFormat(); | auto format = desc.GetFormat(); | ||||
auto origin_format = desc.GetOriginFormat(); | auto origin_format = desc.GetOriginFormat(); | ||||
// data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. | // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. | ||||
bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op); | |||||
auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); | |||||
bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); | |||||
if (need_check_internal_format) { | if (need_check_internal_format) { | ||||
bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | ||||
if (is_internal) { | if (is_internal) { | ||||
@@ -1346,19 +1348,22 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
ge::TensorUtils::SetSize(desc, shape_size); | ge::TensorUtils::SetSize(desc, shape_size); | ||||
graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateInputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
} | |||||
// Size will be recalculated in the build stage | |||||
ge::TensorUtils::SetSize(desc, 0); | |||||
graph_ret = op->UpdateOutputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateOutputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
if (!tune_flag) { | |||||
graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateInputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
} | |||||
// Size will be recalculated in the build stage | |||||
ge::TensorUtils::SetSize(desc, 0); | |||||
graph_ret = op->UpdateOutputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateOutputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
} | |||||
} else { | |||||
GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); | |||||
} | } | ||||
if (!dynamic_shape_range_vec.empty()) { | if (!dynamic_shape_range_vec.empty()) { | ||||
ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); | ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); | ||||
GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); | GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); | ||||
@@ -1763,13 +1768,13 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) { | |||||
GeTensorDesc desc(user_input[index].GetTensorDesc()); | GeTensorDesc desc(user_input[index].GetTensorDesc()); | ||||
for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { | for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { | ||||
if (desc.GetShape().GetDim(i) < 0) { | |||||
std::string situation = "data dim[" + std::to_string(i) + "][" + | |||||
std::to_string(desc.GetShape().GetDim(i)) + "]" ; | |||||
std::string reason = "it need >= 0"; | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); | |||||
GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, | |||||
desc.GetShape().GetDim(i)); | |||||
int64_t dim = desc.GetShape().GetDim(i); | |||||
if (dim < UNKNOWN_DIM_NUM) { | |||||
std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ; | |||||
std::string reason = "it need >= -2"; | |||||
REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}), | |||||
std::vector<std::string>({situation, reason})); | |||||
GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim); | |||||
return GE_GRAPH_INIT_FAILED; | return GE_GRAPH_INIT_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() { | |||||
if (!convert_flag) { | if (!convert_flag) { | ||||
string error_msg = "Top name " + related_input_name + "convert rank failed, Please" | string error_msg = "Top name " + related_input_name + "convert rank failed, Please" | ||||
" ensure top name in aipp config is the top name of data node."; | " ensure top name in aipp config is the top name of data node."; | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { | |||||
if (another_item->related_input_name().empty()) { | if (another_item->related_input_name().empty()) { | ||||
string error_msg = "Can not both set related_input_name and related_input_rank!" | string error_msg = "Can not both set related_input_name and related_input_rank!" | ||||
" Please ensure param is the same with the first aipp config(related_input_name)."; | " Please ensure param is the same with the first aipp config(related_input_name)."; | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (item->related_input_name() == another_item->related_input_name()) { | if (item->related_input_name() == another_item->related_input_name()) { | ||||
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" | string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" | ||||
" param is different in different aipp config."; | " param is different in different aipp config."; | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
} | } | ||||
@@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { | |||||
if (!another_item->related_input_name().empty()) { | if (!another_item->related_input_name().empty()) { | ||||
string error_msg = "Can not both set related_input_rank and related_input_name!" | string error_msg = "Can not both set related_input_rank and related_input_name!" | ||||
" Please ensure param is the same with the first aipp config(related_input_rank)."; | " Please ensure param is the same with the first aipp config(related_input_rank)."; | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (item->related_input_rank() == another_item->related_input_rank()) { | if (item->related_input_rank() == another_item->related_input_rank()) { | ||||
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" | string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" | ||||
" param is different in different aipp config."; | " param is different in different aipp config."; | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
} | } | ||||
@@ -212,7 +216,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||||
GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||||
PARAM_INVALID, | PARAM_INVALID, | ||||
"Can not config part of outputs of Data node to support AIPP, config all " | "Can not config part of outputs of Data node to support AIPP, config all " | ||||
"of the outputs of Data to support AIPP, or config none of them"); | "of the outputs of Data to support AIPP, or config none of them"); | ||||
@@ -3,6 +3,7 @@ set(PROTO_LIST | |||||
) | ) | ||||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
protobuf_generate(ge_atcstub PROTO_ATCSTUB_SRCS PROTO_ATCSTUB_HDRS ${PROTO_LIST}) | |||||
set(SRC_LIST | set(SRC_LIST | ||||
"engine/host_cpu_engine.cc" | "engine/host_cpu_engine.cc" | ||||
@@ -61,7 +62,7 @@ target_link_libraries(host_cpu_engine PRIVATE | |||||
) | ) | ||||
############ atcstub/libhost_cpu_engine.so ############ | ############ atcstub/libhost_cpu_engine.so ############ | ||||
add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_ATCSTUB_HDRS}) | |||||
target_compile_options(atc_host_cpu_engine PRIVATE | target_compile_options(atc_host_cpu_engine PRIVATE | ||||
-Werror | -Werror | ||||
@@ -84,7 +85,7 @@ target_include_directories(atc_host_cpu_engine PRIVATE | |||||
${METADEF_DIR}/inc/external | ${METADEF_DIR}/inc/external | ||||
${METADEF_DIR}/inc/external/graph | ${METADEF_DIR}/inc/external/graph | ||||
${CMAKE_BINARY_DIR} | ${CMAKE_BINARY_DIR} | ||||
${CMAKE_BINARY_DIR}/proto/ge | |||||
${CMAKE_BINARY_DIR}/proto/ge_atcstub | |||||
#### yellow zone #### | #### yellow zone #### | ||||
${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
#### blue zone #### | #### blue zone #### | ||||
@@ -33,7 +33,7 @@ const int kNumOne = 1; | |||||
} // namespace | } // namespace | ||||
Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | ||||
vector<GeTensorPtr> &v_output) { | vector<GeTensorPtr> &v_output) { | ||||
GELOGI("ConcatOffsetKernel in."); | |||||
GELOGD("ConcatOffsetKernel in"); | |||||
if (op_desc_ptr == nullptr) { | if (op_desc_ptr == nullptr) { | ||||
GELOGE(PARAM_INVALID, "input opdesc is nullptr."); | GELOGE(PARAM_INVALID, "input opdesc is nullptr."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
@@ -41,7 +41,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
// validate attrs | // validate attrs | ||||
int N = 0; | int N = 0; | ||||
if (!(AttrUtils::GetInt(op_desc_ptr, "N", N))) { | if (!(AttrUtils::GetInt(op_desc_ptr, "N", N))) { | ||||
GELOGW("Attr %s does not exist.", "N"); | |||||
GELOGW("Attr %s does not exist", "N"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
// follow IR def, the first input is concat_dim | // follow IR def, the first input is concat_dim | ||||
@@ -50,7 +50,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
int32_t concat_dim = *(const_cast<int32_t *>(reinterpret_cast<const int32_t *>(input_0->GetData().data()))); | int32_t concat_dim = *(const_cast<int32_t *>(reinterpret_cast<const int32_t *>(input_0->GetData().data()))); | ||||
// validate inputs | // validate inputs | ||||
if ((static_cast<int>(input.size()) != (N + kNumOne)) || (input.size() <= kConcatOffsetInputIndexOne)) { | if ((static_cast<int>(input.size()) != (N + kNumOne)) || (input.size() <= kConcatOffsetInputIndexOne)) { | ||||
GELOGW("The number of input for concat offset must be equal to %d, and must be more than one.", (N + kNumOne)); | |||||
GELOGW("The number of input for concat offset must be equal to %d, and must be more than one", (N + kNumOne)); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
@@ -61,7 +61,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
GELOGW("Concat dim is bigger than the size of output_shape."); | GELOGW("Concat dim is bigger than the size of output_shape."); | ||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
GELOGI("Output shape size is %ld", output_size); | |||||
GELOGI("Output shape size is %ld.", output_size); | |||||
int32_t offset = 0; | int32_t offset = 0; | ||||
if (output_size < 0) { | if (output_size < 0) { | ||||
GELOGE(FAILED, "Index is negative."); | GELOGE(FAILED, "Index is negative."); | ||||
@@ -86,7 +86,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
output_ptr->MutableTensorDesc().SetShape(output_shape); | output_ptr->MutableTensorDesc().SetShape(output_shape); | ||||
GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), | GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), | ||||
static_cast<size_t>(sizeof(DT_INT32) * output_size)) != GRAPH_SUCCESS, | static_cast<size_t>(sizeof(DT_INT32) * output_size)) != GRAPH_SUCCESS, | ||||
GELOGW("set data failed"); | |||||
GELOGW("set data failed."); | |||||
return NOT_CHANGED); | return NOT_CHANGED); | ||||
v_output.push_back(output_ptr); | v_output.push_back(output_ptr); | ||||
// caculate offset | // caculate offset | ||||
@@ -99,7 +99,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
} | } | ||||
offset += input_dim; | offset += input_dim; | ||||
} | } | ||||
GELOGI("ConcatOffsetKernel success."); | |||||
GELOGD("ConcatOffsetKernel success"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
REGISTER_KERNEL(CONCATOFFSET, ConcatOffsetKernel); | REGISTER_KERNEL(CONCATOFFSET, ConcatOffsetKernel); | ||||
@@ -278,7 +278,7 @@ Status GatherV2Kernel::SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr | |||||
auto indices_ptr = const_cast<int32_t *>(reinterpret_cast<const int32_t *>(indices_tensor_ptr->GetData().data())); | auto indices_ptr = const_cast<int32_t *>(reinterpret_cast<const int32_t *>(indices_tensor_ptr->GetData().data())); | ||||
for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | ||||
if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | ||||
GELOGW("indices %ld value is not in range [0, %ld)", i, x_shape.GetDim(axis)); | |||||
GELOGW("indices %ld value is not in range [0, %ld).", i, x_shape.GetDim(axis)); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
indicates_.push_back(*(indices_ptr + i)); | indicates_.push_back(*(indices_ptr + i)); | ||||
@@ -288,7 +288,7 @@ Status GatherV2Kernel::SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr | |||||
auto indices_ptr = const_cast<int64_t *>(reinterpret_cast<const int64_t *>(indices_tensor_ptr->GetData().data())); | auto indices_ptr = const_cast<int64_t *>(reinterpret_cast<const int64_t *>(indices_tensor_ptr->GetData().data())); | ||||
for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | ||||
if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | ||||
GELOGW("indices %ld value is not in range [0, %ld)", i, x_shape.GetDim(axis)); | |||||
GELOGW("indices %ld value is not in range [0, %ld).", i, x_shape.GetDim(axis)); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
indicates_.push_back(*(indices_ptr + i)); | indicates_.push_back(*(indices_ptr + i)); | ||||
@@ -344,42 +344,42 @@ Status GatherV2Kernel::Check(const OpDescPtr &op_desc_ptr, const vector<ConstGeT | |||||
auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | ||||
bool is_valid_indices_data_type = indices_data_type == DT_INT32 || indices_data_type == DT_INT64; | bool is_valid_indices_data_type = indices_data_type == DT_INT32 || indices_data_type == DT_INT64; | ||||
if (!is_valid_indices_data_type) { | if (!is_valid_indices_data_type) { | ||||
GELOGW("indices datatype must be DT_INT32 or DT_INT64"); | |||||
GELOGW("indices datatype must be DT_INT32 or DT_INT64."); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
if (indices_shape.GetDimNum() > kMaxIndicatesDims) { | if (indices_shape.GetDimNum() > kMaxIndicatesDims) { | ||||
GELOGW("indices input only support 0 or 1 dims"); | |||||
GELOGW("indices input only support 0 or 1 dims."); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void GatherV2Kernel::DebugPrint(int64_t axis, const GeShape &x_shape, const GeShape &indices_shape, | void GatherV2Kernel::DebugPrint(int64_t axis, const GeShape &x_shape, const GeShape &indices_shape, | ||||
const std::vector<int64_t> &y_shape) { | const std::vector<int64_t> &y_shape) { | ||||
GELOGD("GatherV2Kernel axis:%ld x_shape:%zu indices_shape:%zu y_shape:%zu", axis, x_shape.GetDimNum(), | |||||
GELOGD("GatherV2Kernel axis:%ld x_shape:%zu indices_shape:%zu y_shape:%zu.", axis, x_shape.GetDimNum(), | |||||
indices_shape.GetDimNum(), y_shape.size()); | indices_shape.GetDimNum(), y_shape.size()); | ||||
for (size_t i = 0; i < x_shape.GetDimNum(); i++) { | for (size_t i = 0; i < x_shape.GetDimNum(); i++) { | ||||
GELOGD("GatherV2Kernel x_shape[%zu]: %ld", i, x_shape.GetDim(i)); | |||||
GELOGD("GatherV2Kernel x_shape[%zu]: %ld.", i, x_shape.GetDim(i)); | |||||
} | } | ||||
for (size_t i = 0; i < indices_shape.GetDimNum(); i++) { | for (size_t i = 0; i < indices_shape.GetDimNum(); i++) { | ||||
GELOGD("GatherV2Kernel indices_shape[%zu]: %ld", i, indices_shape.GetDim(i)); | |||||
GELOGD("GatherV2Kernel indices_shape[%zu]: %ld.", i, indices_shape.GetDim(i)); | |||||
} | } | ||||
for (size_t i = 0; i < y_shape.size(); i++) { | for (size_t i = 0; i < y_shape.size(); i++) { | ||||
GELOGD("GatherV2Kernel y_shape[%zu]: %ld", i, y_shape[i]); | |||||
GELOGD("GatherV2Kernel y_shape[%zu]: %ld.", i, y_shape[i]); | |||||
} | } | ||||
for (auto ele : indicates_) { | for (auto ele : indicates_) { | ||||
GELOGD("GatherV2Kernel indices:%ld", ele); | |||||
GELOGD("GatherV2Kernel indices:%ld.", ele); | |||||
} | } | ||||
} | } | ||||
Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | ||||
vector<GeTensorPtr> &v_output) { | vector<GeTensorPtr> &v_output) { | ||||
GELOGI("Enter GatherV2Kernel Process."); | |||||
GELOGI("Enter GatherV2Kernel Process"); | |||||
Status ret = Check(op_desc_ptr, input, v_output); | Status ret = Check(op_desc_ptr, input, v_output); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("param check failed."); | |||||
GELOGW("param check failed"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
GELOGI("GatherV2Kernel[%s] start Process.", op_desc_ptr->GetName().c_str()); | |||||
GELOGI("GatherV2Kernel[%s] start Process", op_desc_ptr->GetName().c_str()); | |||||
ConstGeTensorPtr tensor0 = input.at(kGatherV2InputIndexZero); | ConstGeTensorPtr tensor0 = input.at(kGatherV2InputIndexZero); | ||||
ConstGeTensorPtr tensor1 = input.at(kGatherV2InputIndexOne); | ConstGeTensorPtr tensor1 = input.at(kGatherV2InputIndexOne); | ||||
ConstGeTensorPtr tensor2 = input.at(kGatherV2InputIndexTwo); | ConstGeTensorPtr tensor2 = input.at(kGatherV2InputIndexTwo); | ||||
@@ -394,7 +394,7 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||||
axis = axis >= 0 ? axis : axis + x_shape.GetDimNum(); | axis = axis >= 0 ? axis : axis + x_shape.GetDimNum(); | ||||
// check axis value | // check axis value | ||||
if (axis < 0 || (axis + 1) > static_cast<int64_t>(x_shape.GetDimNum())) { | if (axis < 0 || (axis + 1) > static_cast<int64_t>(x_shape.GetDimNum())) { | ||||
GELOGW("axis is invalid"); | |||||
GELOGW("axis is invalid!"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | ||||
@@ -407,7 +407,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||||
// check input data type | // check input data type | ||||
auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | ||||
if (supported_type.find(x_data_type) == supported_type.end()) { | if (supported_type.find(x_data_type) == supported_type.end()) { | ||||
GELOGI("GatherV2Kernel does not support this Data type:%s", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
GELOGI("GatherV2Kernel does not support this Data type:%s.", | |||||
TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
// calc output shape | // calc output shape | ||||
@@ -61,4 +61,5 @@ Status IdentityKernel::Compute(const ge::OpDescPtr op_desc, const std::vector<ge | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
REGISTER_KERNEL(IDENTITY, IdentityKernel); | REGISTER_KERNEL(IDENTITY, IdentityKernel); | ||||
REGISTER_KERNEL(PLACEHOLDERWITHDEFAULT, IdentityKernel); | |||||
} // namespace ge | } // namespace ge |
@@ -84,14 +84,14 @@ void GetOriginStrideVec(const std::vector<ge::ConstGeTensorPtr> &input, vector<i | |||||
} // namespace | } // namespace | ||||
Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | ||||
vector<ge::GeTensorPtr> &v_output) { | vector<ge::GeTensorPtr> &v_output) { | ||||
GELOGD("StridedSliceKernel in."); | |||||
GELOGD("StridedSliceKernel in"); | |||||
// 1.Check input and attrs | // 1.Check input and attrs | ||||
if (CheckAndGetAttr(attr) != SUCCESS) { | if (CheckAndGetAttr(attr) != SUCCESS) { | ||||
GELOGW("Check and get attrs failed.Ignore kernel."); | |||||
GELOGW("Check and get attrs failed.Ignore kernel"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
if (CheckInputParam(input) != SUCCESS) { | if (CheckInputParam(input) != SUCCESS) { | ||||
GELOGW("Check input params failed.Ignore kernel."); | |||||
GELOGW("Check input params failed.Ignore kernel"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
// 2.Init param with mask attrs. | // 2.Init param with mask attrs. | ||||
@@ -120,7 +120,7 @@ Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<g | |||||
auto ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), data_type, input_dims, begin_vec, | auto ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), data_type, input_dims, begin_vec, | ||||
output_dims, output_ptr.get(), stride_vec); | output_dims, output_ptr.get(), stride_vec); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed."); | |||||
GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
@@ -133,7 +133,7 @@ Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<g | |||||
GetOutputDims(final_dim_size, output_dims, v_dims); | GetOutputDims(final_dim_size, output_dims, v_dims); | ||||
t_d.SetShape(GeShape(v_dims)); | t_d.SetShape(GeShape(v_dims)); | ||||
v_output.push_back(output_ptr); | v_output.push_back(output_ptr); | ||||
GELOGI("StridedSliceKernel success."); | |||||
GELOGI("StridedSliceKernel success"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | ||||
@@ -144,7 +144,7 @@ Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | |||||
// Get all op attr value of strided_slice | // Get all op attr value of strided_slice | ||||
for (auto &attr_2_value : attr_value_map_) { | for (auto &attr_2_value : attr_value_map_) { | ||||
if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { | if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { | ||||
GELOGE(PARAM_INVALID, "Get %s attr failed.", attr_2_value.first.c_str()); | |||||
GELOGE(PARAM_INVALID, "Get %s attr failed", attr_2_value.first.c_str()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
} | } | ||||
@@ -182,7 +182,7 @@ Status StridedSliceKernel::CheckInputParam(const std::vector<ConstGeTensorPtr> & | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { | if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { | ||||
GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64."); | |||||
GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64"); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -250,7 +250,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr | |||||
end_i = x_dims.at(i); | end_i = x_dims.at(i); | ||||
stride_i = 1; | stride_i = 1; | ||||
} | } | ||||
GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.", | |||||
GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld", | |||||
begin_i, end_i, stride_i, x_dims.at(i)); | begin_i, end_i, stride_i, x_dims.at(i)); | ||||
auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); | auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -258,7 +258,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
int64_t dim_final; | int64_t dim_final; | ||||
GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.", | |||||
GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld", | |||||
begin_i, end_i, stride_i, x_dims.at(i)); | begin_i, end_i, stride_i, x_dims.at(i)); | ||||
(void) StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); | (void) StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); | ||||
output_dims.push_back(dim_final); | output_dims.push_back(dim_final); | ||||