You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 93 kB

3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
4 years ago
4 years ago
4 years ago
2 years ago
2 years ago
2 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
2 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
2 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
4 years ago
2 years ago
3 years ago
4 years ago
3 years ago
2 years ago
4 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
4 years ago
2 years ago
3 years ago
2 years ago
2 years ago
4 years ago
3 years ago
2 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
2 years ago
3 years ago
2 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
2 years ago
2 years ago
2 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
4 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
2 years ago
4 years ago
4 years ago
2 years ago
4 years ago
3 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831
  1. package models
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "path"
  7. "strconv"
  8. "strings"
  9. "time"
  10. "code.gitea.io/gitea/modules/util"
  11. "xorm.io/builder"
  12. "xorm.io/xorm"
  13. "code.gitea.io/gitea/modules/log"
  14. "code.gitea.io/gitea/modules/setting"
  15. "code.gitea.io/gitea/modules/timeutil"
  16. )
  17. type CloudbrainStatus string
  18. type JobType string
  19. type ModelArtsJobStatus string
  20. const (
  21. TypeCloudBrainOne int = iota
  22. TypeCloudBrainTwo
  23. TypeC2Net //智算网络
  24. TypeCDCenter //成都智算中心
  25. TypeCloudBrainAll = -1
  26. )
  27. const (
  28. NPUResource = "NPU"
  29. GPUResource = "CPU/GPU"
  30. GCUResource = "GCU"
  31. AllResource = "all"
  32. //notebook storage category
  33. EVSCategory = "EVS"
  34. EFSCategory = "EFS"
  35. ManagedOwnership = "MANAGED"
  36. DetectedOwnership = "DEDICATED"
  37. NotebookFeature = "NOTEBOOK"
  38. DefaultFeature = "DEFAULT"
  39. JobWaiting CloudbrainStatus = "WAITING"
  40. JobStopped CloudbrainStatus = "STOPPED"
  41. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  42. JobFailed CloudbrainStatus = "FAILED"
  43. JobRunning CloudbrainStatus = "RUNNING"
  44. ModelSafetyTesting CloudbrainStatus = "TESTING"
  45. JobTypeDebug JobType = "DEBUG"
  46. JobTypeBenchmark JobType = "BENCHMARK"
  47. JobTypeModelSafety JobType = "MODELSAFETY"
  48. JobTypeSnn4imagenet JobType = "SNN4IMAGENET"
  49. JobTypeBrainScore JobType = "BRAINSCORE"
  50. JobTypeSnn4Ecoset JobType = "SNN4ECOSET"
  51. JobTypeTrain JobType = "TRAIN"
  52. JobTypeInference JobType = "INFERENCE"
  53. //notebook
  54. ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
  55. ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
  56. ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
  57. ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中
  58. ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动
  59. ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中
  60. ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中
  61. ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败
  62. ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中
  63. ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中
  64. ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止
  65. ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障
  66. ModelArtsDeleting ModelArtsJobStatus = "DELETING" //删除中
  67. ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
  68. ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
  69. ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败
  70. //trainjob
  71. ModelArtsTrainJobUnknown ModelArtsJobStatus = "UNKNOWN" //作业状态未知
  72. ModelArtsTrainJobInit ModelArtsJobStatus = "INIT" //作业初始化状态
  73. ModelArtsTrainJobImageCreating ModelArtsJobStatus = "IMAGE_CREATING" //作业镜像正在创建
  74. ModelArtsTrainJobImageFailed ModelArtsJobStatus = "IMAGE_FAILED" //作业镜像创建失败
  75. ModelArtsTrainJobSubmitTrying ModelArtsJobStatus = "SUBMIT_TRYING" //作业正在提交
  76. ModelArtsTrainJobSubmitFailed ModelArtsJobStatus = "SUBMIT_FAILED" //作业提交失败
  77. ModelArtsTrainJobDeleteFailed ModelArtsJobStatus = "DELETE_FAILED" //作业删除失败
  78. ModelArtsTrainJobWaiting ModelArtsJobStatus = "WAITING" //作业正在排队中
  79. ModelArtsTrainJobRunning ModelArtsJobStatus = "RUNNING" //作业正在运行中
  80. ModelArtsTrainJobKilling ModelArtsJobStatus = "KILLING" //作业正在取消
  81. ModelArtsTrainJobCompleted ModelArtsJobStatus = "COMPLETED" //作业已经完成
  82. ModelArtsTrainJobFailed ModelArtsJobStatus = "FAILED" //作业运行失败
  83. ModelArtsTrainJobKilled ModelArtsJobStatus = "KILLED" //作业取消成功
  84. ModelArtsTrainJobCanceled ModelArtsJobStatus = "CANCELED" //作业取消
  85. ModelArtsTrainJobLost ModelArtsJobStatus = "LOST" //作业丢失
  86. ModelArtsTrainJobScaling ModelArtsJobStatus = "SCALING" //作业正在扩容
  87. ModelArtsTrainJobSubmitModelFailed ModelArtsJobStatus = "SUBMIT_MODEL_FAILED" //提交模型失败
  88. ModelArtsTrainJobDeployServiceFailed ModelArtsJobStatus = "DEPLOY_SERVICE_FAILED" //部署服务失败
  89. ModelArtsTrainJobCheckInit ModelArtsJobStatus = "CHECK_INIT" //审核作业初始化
  90. ModelArtsTrainJobCheckRunning ModelArtsJobStatus = "CHECK_RUNNING" //审核作业正在运行中
  91. ModelArtsTrainJobCheckRunningCompleted ModelArtsJobStatus = "CHECK_RUNNING_COMPLETED" //审核作业已经完成
  92. ModelArtsTrainJobCheckFailed ModelArtsJobStatus = "CHECK_FAILED" //审核作业失败
  93. DURATION_STR_ZERO = "00:00:00"
  94. CloudbrainKeyDuration = 24 * time.Hour
  95. //grampus
  96. GrampusStatusPending = "pending"
  97. GrampusStatusRunning = "RUNNING"
  98. GrampusStatusFailed = "FAILED"
  99. GrampusStatusSucceeded = "SUCCEEDED"
  100. GrampusStatusStopped = "STOPPED"
  101. GrampusStatusStopping = "STOPPING"
  102. GrampusStatusUnknown = "UNKNOWN"
  103. GrampusStatusWaiting = "WAITING"
  104. ModelSuffix = "models.zip"
  105. )
  106. const (
  107. //cluster
  108. OpenICluster = "OpenI"
  109. C2NetCluster = "C2Net"
  110. //AI center
  111. AICenterOfCloudBrainOne = "OpenIOne"
  112. AICenterOfCloudBrainTwo = "OpenITwo"
  113. AICenterOfChengdu = "OpenIChengdu"
  114. //ComputeResource
  115. GPU = "GPU"
  116. NPU = "NPU"
  117. GCU = "GCU"
  118. )
  119. type Cloudbrain struct {
  120. ID int64 `xorm:"pk autoincr"`
  121. JobID string `xorm:"INDEX NOT NULL"`
  122. JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
  123. JobName string
  124. DisplayJobName string
  125. Status string
  126. UserID int64 `xorm:"INDEX NOT NULL"`
  127. RepoID int64 `xorm:"INDEX NOT NULL"`
  128. SubTaskName string
  129. ContainerID string
  130. ContainerIp string
  131. CreatedUnix timeutil.TimeStamp `xorm:"INDEX"`
  132. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  133. Duration int64 `xorm:"DEFAULT 0"` //运行时长 单位秒
  134. TrainJobDuration string `xorm:"DEFAULT '00:00:00'"`
  135. Image string //镜像名称
  136. GpuQueue string //GPU类型即GPU队列
  137. ResourceSpecId int //GPU规格id
  138. DeletedAt time.Time `xorm:"deleted"`
  139. CanDebug bool `xorm:"-"`
  140. CanDel bool `xorm:"-"`
  141. CanModify bool `xorm:"-"`
  142. Type int `xorm:"INDEX"`
  143. BenchmarkTypeID int
  144. BenchmarkChildTypeID int
  145. CardType string
  146. Cluster string
  147. VersionID int64 //版本id
  148. VersionName string `xorm:"INDEX"` //当前版本
  149. Uuid string //数据集id
  150. DatasetName string `xorm:"varchar(2000)"`
  151. VersionCount int //任务的当前版本数量,不包括删除的
  152. IsLatestVersion string //是否是最新版本,1是,0否
  153. CommitID string //提交的仓库代码id
  154. PreVersionName string //父版本名称
  155. ComputeResource string //计算资源,例如npu
  156. EngineID int64 //引擎id
  157. ImageID string //grampus image_id
  158. AiCenter string //grampus ai center: center_id+center_name
  159. TrainUrl string //输出模型的obs路径
  160. BranchName string `xorm:"varchar(2550)"` //分支名称
  161. Parameters string //传给modelarts的param参数
  162. BootFile string `xorm:"varchar(2550)"` //启动文件
  163. DataUrl string `xorm:"varchar(3500)"` //数据集的obs路径
  164. LogUrl string //日志输出的obs路径
  165. PreVersionId int64 //父版本的版本id
  166. FlavorCode string //modelarts上的规格id
  167. Description string `xorm:"varchar(2550)"` //描述
  168. WorkServerNumber int //节点数
  169. FlavorName string //规格名称
  170. EngineName string //引擎名称
  171. TotalVersionCount int //任务的所有版本数量,包括删除的
  172. LabelName string //标签名称
  173. ModelName string //模型名称
  174. ModelVersion string //模型版本
  175. CkptName string //权重文件名称
  176. PreTrainModelUrl string //预训练模型地址
  177. ResultUrl string //推理结果的obs路径
  178. ResultJson string `xorm:"varchar(4000)"`
  179. User *User `xorm:"-"`
  180. Repo *Repository `xorm:"-"`
  181. BenchmarkType string `xorm:"-"` //算法评测,模型评测
  182. BenchmarkTypeName string `xorm:"-"`
  183. BenchmarkTypeRankLink string `xorm:"-"`
  184. StartTime timeutil.TimeStamp
  185. EndTime timeutil.TimeStamp
  186. Cleared bool `xorm:"DEFAULT false"`
  187. Spec *Specification `xorm:"-"`
  188. }
  189. type CloudbrainShow struct {
  190. ID int64
  191. JobID string
  192. RepoFullName string
  193. Type int
  194. JobType string
  195. DisplayJobName string
  196. Duration string
  197. ResourceSpec *Specification
  198. ComputeResource string
  199. AiCenter string
  200. WorkServerNumber int
  201. }
  202. type CloudbrainShow4Action struct {
  203. ID int64
  204. JobID string
  205. Type int
  206. JobType string
  207. DisplayJobName string
  208. ComputeResource string
  209. }
  210. func (task *Cloudbrain) ToShow() *CloudbrainShow {
  211. n := 1
  212. if task.WorkServerNumber > 1 {
  213. n = task.WorkServerNumber
  214. }
  215. c := &CloudbrainShow{
  216. ID: task.ID,
  217. JobID: task.JobID,
  218. JobType: task.JobType,
  219. Type: task.Type,
  220. DisplayJobName: task.DisplayJobName,
  221. Duration: task.TrainJobDuration,
  222. ResourceSpec: task.Spec,
  223. ComputeResource: task.ComputeResource,
  224. WorkServerNumber: n,
  225. }
  226. if task.Repo != nil {
  227. c.RepoFullName = task.Repo.FullName()
  228. }
  229. return c
  230. }
  231. func (task *Cloudbrain) ComputeAndSetDuration() {
  232. var d int64
  233. if task.StartTime == 0 {
  234. d = 0
  235. } else if task.EndTime == 0 {
  236. if !task.IsTerminal() {
  237. d = time.Now().Unix() - task.StartTime.AsTime().Unix()
  238. }
  239. } else {
  240. d = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix()
  241. }
  242. if d < 0 {
  243. d = 0
  244. }
  245. task.Duration = d
  246. task.TrainJobDuration = ConvertDurationToStr(d)
  247. }
  248. func (task *Cloudbrain) CorrectCreateUnix() {
  249. if task.StartTime > 0 && task.CreatedUnix > task.StartTime {
  250. task.CreatedUnix = task.StartTime
  251. }
  252. }
  253. func (task *Cloudbrain) IsTerminal() bool {
  254. status := task.Status
  255. return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) ||
  256. status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) ||
  257. status == string(JobStopped) || status == string(JobFailed) ||
  258. status == string(JobSucceeded) || status == GrampusStatusFailed ||
  259. status == GrampusStatusSucceeded || status == GrampusStatusStopped
  260. }
  261. func (task *Cloudbrain) IsRunning() bool {
  262. status := task.Status
  263. return status == string(ModelArtsTrainJobRunning) || status == string(ModelArtsRunning) ||
  264. status == string(JobRunning) || status == GrampusStatusRunning
  265. }
  266. func (task *Cloudbrain) IsUserHasRight(user *User) bool {
  267. if user == nil {
  268. return false
  269. }
  270. return user.IsAdmin || user.ID == task.UserID
  271. }
  272. func (task *Cloudbrain) IsGPUTask() bool {
  273. return task.ComputeResource == GPUResource
  274. }
  275. func (task *Cloudbrain) IsGCUTask() bool {
  276. return task.ComputeResource == GCUResource
  277. }
  278. func (task *Cloudbrain) IsNPUTask() bool {
  279. return task.ComputeResource == NPUResource
  280. }
  281. func ConvertDurationToStr(duration int64) string {
  282. if duration <= 0 {
  283. return DURATION_STR_ZERO
  284. }
  285. return util.AddZero(duration/3600) + ":" + util.AddZero(duration%3600/60) + ":" + util.AddZero(duration%60)
  286. }
  287. func ConvertStrToDuration(trainJobDuration string) int64 {
  288. trainJobDurationList := strings.Split(trainJobDuration, ":")
  289. if len(trainJobDurationList) == 3 {
  290. i, _ := strconv.ParseInt(trainJobDurationList[0], 10, 64)
  291. j, _ := strconv.ParseInt(trainJobDurationList[1], 10, 64)
  292. k, _ := strconv.ParseInt(trainJobDurationList[2], 10, 64)
  293. return i*3600 + j*60 + k
  294. } else {
  295. return 0
  296. }
  297. }
  298. func IsTrainJobTerminal(status string) bool {
  299. return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == GrampusStatusFailed || status == GrampusStatusStopped || status == GrampusStatusSucceeded
  300. }
  301. func IsModelArtsDebugJobTerminal(status string) bool {
  302. return status == string(ModelArtsStopped)
  303. }
  304. func IsCloudBrainOneDebugJobTerminal(status string) bool {
  305. return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded)
  306. }
  307. func IsModelBenchMarkJobType(jobType string) bool {
  308. return jobType == string(JobTypeSnn4imagenet) || jobType == string(JobTypeBrainScore) || jobType == string(JobTypeSnn4Ecoset)
  309. }
  310. func ParseAndSetDurationFromCloudBrainOne(result JobResultPayload, task *Cloudbrain) {
  311. isActivated := result.JobStatus.CreatedTime > 0
  312. if task.StartTime == 0 && isActivated {
  313. task.StartTime = timeutil.TimeStamp(result.JobStatus.CreatedTime / 1000)
  314. }
  315. if task.EndTime == 0 && IsCloudBrainOneDebugJobTerminal(task.Status) && isActivated {
  316. if result.JobStatus.CompletedTime > 0 {
  317. task.EndTime = timeutil.TimeStamp(result.JobStatus.CompletedTime / 1000)
  318. }
  319. }
  320. task.CorrectCreateUnix()
  321. task.ComputeAndSetDuration()
  322. }
  323. func ParseAndSetDurationFromModelArtsNotebook(result *GetNotebook2Result, job *Cloudbrain) {
  324. if job.StartTime == 0 && result.Lease.UpdateTime > 0 {
  325. job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
  326. }
  327. job.Status = result.Status
  328. if job.EndTime == 0 && IsModelArtsDebugJobTerminal(job.Status) {
  329. job.EndTime = timeutil.TimeStampNow()
  330. }
  331. job.CorrectCreateUnix()
  332. job.ComputeAndSetDuration()
  333. }
  334. type CloudbrainInfo struct {
  335. Cloudbrain `xorm:"extends"`
  336. User `xorm:"extends"`
  337. }
  338. type CloudBrainLoginResult struct {
  339. Code string
  340. Msg string
  341. Payload map[string]interface{}
  342. }
  343. type TaskRole struct {
  344. Name string `json:"name"`
  345. TaskNumber int `json:"taskNumber"`
  346. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  347. MinFailedTaskCount int `json:"minFailedTaskCount"`
  348. CPUNumber int `json:"cpuNumber"`
  349. GPUNumber int `json:"gpuNumber"`
  350. MemoryMB int `json:"memoryMB"`
  351. ShmMB int `json:"shmMB"`
  352. Command string `json:"command"`
  353. NeedIBDevice bool `json:"needIBDevice"`
  354. IsMainRole bool `json:"isMainRole"`
  355. UseNNI bool `json:"useNNI"`
  356. }
  357. type StHostPath struct {
  358. Path string `json:"path"`
  359. MountPath string `json:"mountPath"`
  360. ReadOnly bool `json:"readOnly"`
  361. }
  362. type Volume struct {
  363. HostPath StHostPath `json:"hostPath"`
  364. }
  365. type CreateJobParams struct {
  366. JobName string `json:"jobName"`
  367. RetryCount int8 `json:"retryCount"`
  368. GpuType string `json:"gpuType"`
  369. Image string `json:"image"`
  370. TaskRoles []TaskRole `json:"taskRoles"`
  371. Volumes []Volume `json:"volumes"`
  372. }
  373. type CreateJobResult struct {
  374. Code string `json:"code"`
  375. Msg string `json:"msg"`
  376. Payload map[string]interface{} `json:"payload"`
  377. }
  378. type QueueDetailResult struct {
  379. Code string `json:"code"`
  380. Msg string `json:"msg"`
  381. Payload map[string]QueueDetail `json:"payload"`
  382. }
  383. type QueueDetail struct {
  384. JobScheduleInfo JobScheduleInfo `json:"JobScheduleInfo"`
  385. }
  386. type JobScheduleInfo struct {
  387. Pending int `json:"Pending"`
  388. Running int `json:"Running"`
  389. MedianPendingJobDurationSec int `json:"MedianPendingJobDurationSec"`
  390. }
  391. type GetJobResult struct {
  392. Code string `json:"code"`
  393. Msg string `json:"msg"`
  394. Payload map[string]interface{} `json:"payload"`
  395. }
  396. type GetImagesResult struct {
  397. Code string `json:"code"`
  398. Msg string `json:"msg"`
  399. Payload GetImagesPayload `json:"payload"`
  400. }
  401. type GetImagesPayload struct {
  402. Count int `json:"count"`
  403. TotalPages int `json:"totalPages,omitempty"`
  404. ImageInfo []*ImageInfo `json:"rows"`
  405. }
  406. type CloudbrainsOptions struct {
  407. ListOptions
  408. RepoID int64 // include all repos if empty
  409. UserID int64
  410. JobID string
  411. SortType string
  412. CloudbrainIDs []int64
  413. JobStatus []string
  414. JobStatusNot bool
  415. Keyword string
  416. Type int
  417. JobTypes []string
  418. VersionName string
  419. IsLatestVersion string
  420. JobTypeNot bool
  421. NeedRepoInfo bool
  422. RepoIDList []int64
  423. BeginTime time.Time
  424. EndTime time.Time
  425. ComputeResource string
  426. BeginTimeUnix int64
  427. EndTimeUnix int64
  428. AiCenter string
  429. NeedDeleteInfo string
  430. Cluster string
  431. AccCardType string
  432. AccCardsNum int
  433. WorkServerNumber int
  434. }
  435. type TaskPod struct {
  436. TaskRoleStatus struct {
  437. Name string `json:"name"`
  438. } `json:"taskRoleStatus"`
  439. //TaskStatuses []struct {
  440. // TaskIndex int `json:"taskIndex"`
  441. // PodUID string `json:"podUid"`
  442. // PodIP string `json:"podIp"`
  443. // PodName string `json:"podName"`
  444. // ContainerID string `json:"containerId"`
  445. // ContainerIP string `json:"containerIp"`
  446. // ContainerGpus string `json:"containerGpus"`
  447. // State string `json:"state"`
  448. // StartAt time.Time `json:"startAt"`
  449. // FinishedAt time.Time `json:"finishedAt"`
  450. // ExitCode int `json:"exitCode"`
  451. // ExitDiagnostics string `json:"exitDiagnostics"`
  452. // RetriedCount int `json:"retriedCount"`
  453. // StartTime string
  454. // FinishedTime string
  455. //} `json:"taskStatuses"`
  456. TaskStatuses []TaskStatuses `json:"taskStatuses"`
  457. }
  458. type TaskStatuses struct {
  459. TaskIndex int `json:"taskIndex"`
  460. PodUID string `json:"podUid"`
  461. PodIP string `json:"podIp"`
  462. PodName string `json:"podName"`
  463. ContainerID string `json:"containerId"`
  464. ContainerIP string `json:"containerIp"`
  465. ContainerGpus string `json:"containerGpus"`
  466. State string `json:"state"`
  467. StartAt time.Time `json:"startAt"`
  468. FinishedAt time.Time `json:"finishedAt"`
  469. ExitCode int `json:"exitCode"`
  470. ExitDiagnostics string `json:"exitDiagnostics"`
  471. RetriedCount int `json:"retriedCount"`
  472. StartTime string
  473. FinishedTime string
  474. }
  475. type TaskInfo struct {
  476. Username string `json:"username"`
  477. TaskName string `json:"task_name"`
  478. CodeName string `json:"code_name"`
  479. BenchmarkCategory []string `json:"selected_category"`
  480. CodeLink string `json:"code_link"`
  481. GpuType string `json:"gpu_type"`
  482. }
  483. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  484. data, _ := json.Marshal(input)
  485. var taskPod TaskPod
  486. err := json.Unmarshal(data, &taskPod)
  487. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  488. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  489. //if the task is not finished or stopped,the cloudbrain renturns 0001-01-01 08:00:00, the finishedTime shows with -
  490. if strings.HasPrefix(taskPod.TaskStatuses[0].FinishedTime, "0001") {
  491. taskPod.TaskStatuses[0].FinishedTime = "-"
  492. }
  493. return taskPod, err
  494. }
  495. type JobResultPayload struct {
  496. ID string `json:"id"`
  497. Name string `json:"name"`
  498. Platform string `json:"platform"`
  499. JobStatus struct {
  500. Username string `json:"username"`
  501. State string `json:"state"`
  502. SubState string `json:"subState"`
  503. ExecutionType string `json:"executionType"`
  504. Retries int `json:"retries"`
  505. CreatedTime int64 `json:"createdTime"`
  506. CompletedTime int64 `json:"completedTime"`
  507. AppID string `json:"appId"`
  508. AppProgress string `json:"appProgress"`
  509. AppTrackingURL string `json:"appTrackingUrl"`
  510. AppLaunchedTime int64 `json:"appLaunchedTime"`
  511. AppCompletedTime interface{} `json:"appCompletedTime"`
  512. AppExitCode int `json:"appExitCode"`
  513. AppExitDiagnostics string `json:"appExitDiagnostics"`
  514. AppExitType interface{} `json:"appExitType"`
  515. VirtualCluster string `json:"virtualCluster"`
  516. StartTime string
  517. EndTime string
  518. } `json:"jobStatus"`
  519. TaskRoles map[string]interface{} `json:"taskRoles"`
  520. Resource struct {
  521. CPU int `json:"cpu"`
  522. Memory string `json:"memory"`
  523. NvidiaComGpu int `json:"nvidia.com/gpu"`
  524. } `json:"resource"`
  525. Config struct {
  526. Image string `json:"image"`
  527. JobID string `json:"jobId"`
  528. GpuType string `json:"gpuType"`
  529. JobName string `json:"jobName"`
  530. JobType string `json:"jobType"`
  531. TaskRoles []struct {
  532. Name string `json:"name"`
  533. ShmMB int `json:"shmMB"`
  534. Command string `json:"command"`
  535. MemoryMB int `json:"memoryMB"`
  536. CPUNumber int `json:"cpuNumber"`
  537. GpuNumber int `json:"gpuNumber"`
  538. IsMainRole bool `json:"isMainRole"`
  539. TaskNumber int `json:"taskNumber"`
  540. NeedIBDevice bool `json:"needIBDevice"`
  541. MinFailedTaskCount int `json:"minFailedTaskCount"`
  542. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  543. } `json:"taskRoles"`
  544. RetryCount int `json:"retryCount"`
  545. } `json:"config"`
  546. Userinfo struct {
  547. User string `json:"user"`
  548. OrgID string `json:"org_id"`
  549. } `json:"userinfo"`
  550. }
  551. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  552. data, _ := json.Marshal(input)
  553. var jobResultPayload JobResultPayload
  554. err := json.Unmarshal(data, &jobResultPayload)
  555. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  556. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  557. if jobResultPayload.JobStatus.State == string(JobWaiting) {
  558. jobResultPayload.JobStatus.StartTime = "-"
  559. jobResultPayload.JobStatus.EndTime = "-"
  560. }
  561. return jobResultPayload, err
  562. }
  563. type ImagesResultPayload struct {
  564. Images []struct {
  565. ID int `json:"id"`
  566. Name string `json:"name"`
  567. Place string `json:"place"`
  568. Description string `json:"description"`
  569. Provider string `json:"provider"`
  570. Createtime string `json:"createtime"`
  571. Remark string `json:"remark"`
  572. } `json:"taskStatuses"`
  573. }
  574. type ImageInfo struct {
  575. ID int `json:"id"`
  576. Name string `json:"name"`
  577. Place string `json:"place"`
  578. Description string `json:"description"`
  579. Provider string `json:"provider"`
  580. Createtime string `json:"createtime"`
  581. Remark string `json:"remark"`
  582. IsPublic int `json:"isPublic"`
  583. PlaceView string
  584. }
  585. type Categories struct {
  586. Category []*Category `json:"category"`
  587. }
  588. type Category struct {
  589. Id int `json:"id"`
  590. Value string `json:"value"`
  591. }
  592. type BenchmarkTypes struct {
  593. BenchmarkType []*BenchmarkType `json:"type"`
  594. }
  595. type BenchmarkType struct {
  596. Id int `json:"id"`
  597. RankLink string `json:"rank_link"`
  598. First string `json:"first"` //一级算法类型名称
  599. Second []*BenchmarkDataset `json:"second"`
  600. }
  601. type BenchmarkDataset struct {
  602. Id int `json:"id"`
  603. Value string `json:"value"` //二级算法类型名称
  604. Attachment string `json:"attachment"` //数据集的uuid
  605. Owner string `json:"owner"` //评估脚本所在仓库的拥有者
  606. RepoName string `json:"repo_name"` //评估脚本所在仓库的名称
  607. }
  608. type GpuInfos struct {
  609. GpuInfo []*GpuInfo `json:"gpu_type"`
  610. }
  611. type GpuInfo struct {
  612. Id int `json:"id"`
  613. Value string `json:"value"`
  614. Queue string `json:"queue"`
  615. }
  616. type ResourceSpecs struct {
  617. ResourceSpec []*ResourceSpec `json:"resorce_specs"`
  618. }
  619. type ResourceSpec struct {
  620. Id int `json:"id"`
  621. CpuNum int `json:"cpu"`
  622. GpuNum int `json:"gpu"`
  623. MemMiB int `json:"memMiB"`
  624. ShareMemMiB int `json:"shareMemMiB"`
  625. UnitPrice int64 `json:"unitPrice"`
  626. }
  627. type FlavorInfos struct {
  628. FlavorInfo []*FlavorInfo `json:"flavor_info"`
  629. }
  630. type FlavorInfo struct {
  631. Id int `json:"id"`
  632. Value string `json:"value"`
  633. Desc string `json:"desc"`
  634. UnitPrice int64 `json:"unitPrice"`
  635. }
  636. type SpecialPools struct {
  637. Pools []*SpecialPool `json:"pools"`
  638. }
  639. type SpecialPool struct {
  640. Org string `json:"org"`
  641. Type string `json:"type"`
  642. IsExclusive bool `json:"isExclusive"`
  643. Pool []*GpuInfo `json:"pool"`
  644. JobType []string `json:"jobType"`
  645. ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
  646. Flavor []*setting.FlavorInfo `json:"flavor"`
  647. }
  648. type PoolInfos struct {
  649. PoolInfo []*PoolInfo `json:"pool_info"`
  650. }
  651. type PoolInfo struct {
  652. PoolId string `json:"pool_id"`
  653. PoolName string `json:"pool_name"`
  654. PoolType string `json:"pool_type"`
  655. }
  656. type CommitImageCloudBrainParams struct {
  657. Ip string `json:"ip"`
  658. TaskContainerId string `json:"taskContainerId"`
  659. ImageTag string `json:"imageTag"`
  660. ImageDescription string `json:"imageDescription"`
  661. }
  662. type CommitImageParams struct {
  663. CommitImageCloudBrainParams
  664. IsPrivate bool
  665. Topics []string
  666. CloudBrainType int
  667. UID int64
  668. Place string
  669. Type int
  670. }
  671. type CommitImageResult struct {
  672. Code string `json:"code"`
  673. Msg string `json:"msg"`
  674. Payload map[string]interface{} `json:"payload"`
  675. }
  676. type GetJobLogParams struct {
  677. Size string `json:"size"`
  678. Sort string `json:"sort"`
  679. QueryInfo QueryInfo `json:"query"`
  680. }
  681. type QueryInfo struct {
  682. MatchInfo MatchInfo `json:"match"`
  683. }
  684. type MatchInfo struct {
  685. PodName string `json:"kubernetes.pod.name"`
  686. }
  687. type GetJobLogResult struct {
  688. ScrollID string `json:"_scroll_id"`
  689. Took int `json:"took"`
  690. TimedOut bool `json:"timed_out"`
  691. Shards struct {
  692. Total int `json:"total"`
  693. Successful int `json:"successful"`
  694. Skipped int `json:"skipped"`
  695. Failed int `json:"failed"`
  696. } `json:"_shards"`
  697. Hits struct {
  698. Hits []Hits `json:"hits"`
  699. } `json:"hits"`
  700. }
  701. type Hits struct {
  702. Index string `json:"_index"`
  703. Type string `json:"_type"`
  704. ID string `json:"_id"`
  705. Source struct {
  706. Message string `json:"message"`
  707. } `json:"_source"`
  708. Sort []int `json:"sort"`
  709. }
  710. type GetAllJobLogParams struct {
  711. Scroll string `json:"scroll"`
  712. ScrollID string `json:"scroll_id"`
  713. }
  714. type DeleteJobLogTokenParams struct {
  715. ScrollID string `json:"scroll_id"`
  716. }
  717. type DeleteJobLogTokenResult struct {
  718. Succeeded bool `json:"succeeded"`
  719. NumFreed int `json:"num_freed"`
  720. }
  721. type CloudBrainResult struct {
  722. Code string `json:"code"`
  723. Msg string `json:"msg"`
  724. }
  725. type CreateNotebook2Params struct {
  726. JobName string `json:"name"`
  727. Description string `json:"description"`
  728. Duration int64 `json:"duration"` //ms
  729. Feature string `json:"feature"`
  730. PoolID string `json:"pool_id"`
  731. Flavor string `json:"flavor"`
  732. ImageID string `json:"image_id"`
  733. WorkspaceID string `json:"workspace_id"`
  734. Volume VolumeReq `json:"volume"`
  735. }
  736. type CreateNotebookWithoutPoolParams struct {
  737. JobName string `json:"name"`
  738. Description string `json:"description"`
  739. Duration int64 `json:"duration"` //ms
  740. Feature string `json:"feature"`
  741. Flavor string `json:"flavor"`
  742. ImageID string `json:"image_id"`
  743. WorkspaceID string `json:"workspace_id"`
  744. Volume VolumeReq `json:"volume"`
  745. }
  746. type VolumeReq struct {
  747. Capacity int `json:"capacity"`
  748. Category string `json:"category"`
  749. Ownership string `json:"ownership"`
  750. Uri string `json:"uri"`
  751. }
  752. type CreateNotebookParams struct {
  753. JobName string `json:"name"`
  754. Description string `json:"description"`
  755. ProfileID string `json:"profile_id"`
  756. Flavor string `json:"flavor"`
  757. Spec Spec `json:"spec"`
  758. Workspace Workspace `json:"workspace"`
  759. Pool Pool `json:"pool"`
  760. }
  761. type Pool struct {
  762. ID string `json:"id"`
  763. Name string `json:"name"`
  764. Type string `json:"type"`
  765. }
  766. type Workspace struct {
  767. ID string `json:"id"`
  768. }
  769. type Spec struct {
  770. Storage Storage `json:"storage"`
  771. AutoStop AutoStop `json:"auto_stop"`
  772. }
  773. type AutoStop struct {
  774. Enable bool `json:"enable"`
  775. Duration int `json:"duration"`
  776. }
  777. type Storage struct {
  778. Type string `json:"type"`
  779. Location Location `json:"location"`
  780. }
  781. type Location struct {
  782. Path string `json:"path"`
  783. }
  784. type NotebookResult struct {
  785. ErrorCode string `json:"error_code"`
  786. ErrorMsg string `json:"error_msg"`
  787. }
  788. type CreateNotebookResult struct {
  789. ErrorCode string `json:"error_code"`
  790. ErrorMsg string `json:"error_msg"`
  791. ID string `json:"id"`
  792. Name string `json:"name"`
  793. Description string `json:"description"`
  794. Status string `json:"status"`
  795. CreationTimestamp string `json:"creation_timestamp"`
  796. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  797. Profile struct {
  798. ID string `json:"id"`
  799. Name string `json:"name"`
  800. Description string `json:"description"`
  801. DeType string `json:"de_type"`
  802. FlavorType string `json:"flavor_type"`
  803. } `json:"profile"`
  804. Flavor string `json:"flavor"`
  805. FlavorDetails struct {
  806. Name string `json:"name"`
  807. Status string `json:"status"`
  808. QueuingNum int `json:"queuing_num"`
  809. QueueLeftTime int `json:"queue_left_time"` //s
  810. Duration int `json:"duration"` //auto_stop_time s
  811. } `json:"flavor_details"`
  812. }
  813. type GetNotebookResult struct {
  814. ErrorCode string `json:"error_code"`
  815. ErrorMsg string `json:"error_msg"`
  816. ID string `json:"id"`
  817. Name string `json:"name"`
  818. Description string `json:"description"`
  819. Status string `json:"status"`
  820. CreationTimestamp string `json:"creation_timestamp"`
  821. CreateTime string
  822. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  823. LatestUpdateTime string
  824. Profile struct {
  825. ID string `json:"id"`
  826. Name string `json:"name"`
  827. Description string `json:"description"`
  828. DeType string `json:"de_type"`
  829. FlavorType string `json:"flavor_type"`
  830. } `json:"profile"`
  831. Flavor string `json:"flavor"`
  832. FlavorDetails struct {
  833. Name string `json:"name"`
  834. Status string `json:"status"`
  835. QueuingNum int `json:"queuing_num"`
  836. QueueLeftTime int `json:"queue_left_time"` //s
  837. Duration int `json:"duration"` //auto_stop_time s
  838. } `json:"flavor_details"`
  839. QueuingInfo struct {
  840. ID string `json:"id"`
  841. Name string `json:"name"`
  842. Flavor string `json:"flavor"`
  843. DeType string `json:"de_type"`
  844. Status string `json:"status"`
  845. BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue
  846. BeginTime string
  847. RemainTime int `json:"remain_time"` //remain time of instance
  848. EndTimestamp int `json:"end_timestamp"` //
  849. EndTime string
  850. Rank int `json:"rank"` //rank of instance in queue
  851. } `json:"queuing_info"`
  852. Spec struct {
  853. Annotations struct {
  854. TargetDomain string `json:"target_domain"`
  855. Url string `json:"url"`
  856. } `json:"annotations"`
  857. } `json:"spec"`
  858. }
  859. type GetNotebook2Result struct {
  860. ErrorCode string `json:"error_code"`
  861. ErrorMsg string `json:"error_msg"`
  862. FailReason string `json:"fail_reason"`
  863. ID string `json:"id"`
  864. Name string `json:"name"`
  865. Description string `json:"description"`
  866. Status string `json:"status"`
  867. Url string `json:"url"` //实例访问的URL
  868. Token string `json:"token"` //notebook鉴权使用的token信息
  869. Flavor string `json:"flavor"`
  870. CreateTime string
  871. LatestUpdateTime string
  872. CreateAt int64 `json:"create_at"` //实例创建的时间,UTC毫秒
  873. UpdateAt int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒
  874. Image struct {
  875. Name string `json:"name"`
  876. Status string `json:"status"`
  877. QueuingNum int `json:"queuing_num"`
  878. QueueLeftTime int `json:"queue_left_time"` //s
  879. Duration int `json:"duration"` //auto_stop_time s
  880. } `json:"image"`
  881. Lease struct {
  882. CreateTime int64 `json:"create_at"` //实例创建的时间,UTC毫秒
  883. Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例
  884. UpdateTime int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒
  885. } `json:"lease"` //实例自动停止的倒计时信息
  886. VolumeRes struct {
  887. Capacity int `json:"capacity"`
  888. Category string `json:"category"`
  889. MountPath string `json:"mount_path"`
  890. Ownership string `json:"ownership"`
  891. Status string `json:"status"`
  892. } `json:"volume"`
  893. }
  894. type GetTokenParams struct {
  895. Auth Auth `json:"auth"`
  896. }
  897. type Auth struct {
  898. Identity Identity `json:"identity"`
  899. Scope Scope `json:"scope"`
  900. }
  901. type Scope struct {
  902. Project Project `json:"project"`
  903. }
  904. type Project struct {
  905. Name string `json:"name"`
  906. }
  907. type Identity struct {
  908. Methods []string `json:"methods"`
  909. Password Password `json:"password"`
  910. }
  911. type Password struct {
  912. User NotebookUser `json:"user"`
  913. }
  914. type NotebookUser struct {
  915. Name string `json:"name"`
  916. Password string `json:"password"`
  917. Domain Domain `json:"domain"`
  918. }
  919. type Domain struct {
  920. Name string `json:"name"`
  921. }
  922. const (
  923. ActionStart = "start"
  924. ActionStop = "stop"
  925. ActionRestart = "restart"
  926. ActionQueue = "queue"
  927. ActionDequeue = "dequeue"
  928. )
  929. type NotebookAction struct {
  930. Action string `json:"action"`
  931. }
  932. type NotebookActionResult struct {
  933. ErrorCode string `json:"error_code"`
  934. ErrorMsg string `json:"error_msg"`
  935. CurrentStatus string `json:"current_status"`
  936. PreviousState string `json:"previous_state"`
  937. Status string `json:"status"`
  938. }
  939. type NotebookGetJobTokenResult struct {
  940. ErrorCode string `json:"error_code"`
  941. ErrorMsg string `json:"error_msg"`
  942. Token string `json:"token"`
  943. }
  944. type NotebookDelResult struct {
  945. NotebookResult
  946. InstanceID string `json:"instance_id"`
  947. }
  948. type CreateUserImageTrainJobParams struct {
  949. JobName string `json:"job_name"`
  950. Description string `json:"job_desc"`
  951. Config UserImageConfig `json:"config"`
  952. WorkspaceID string `json:"workspace_id"`
  953. }
  954. type UserImageConfig struct {
  955. WorkServerNum int `json:"worker_server_num"`
  956. AppUrl string `json:"app_url"` //训练作业的代码目录
  957. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  958. Parameter []Parameter `json:"parameter"`
  959. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  960. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  961. LogUrl string `json:"log_url"`
  962. UserImageUrl string `json:"user_image_url"`
  963. UserCommand string `json:"user_command"`
  964. CreateVersion bool `json:"create_version"`
  965. Flavor Flavor `json:"flavor"`
  966. PoolID string `json:"pool_id"`
  967. ShareAddr string `json:"nas_share_addr"`
  968. MountPath string `json:"nas_mount_path"`
  969. NasType string `json:"nas_type"`
  970. }
  971. type CreateTrainJobParams struct {
  972. JobName string `json:"job_name"`
  973. Description string `json:"job_desc"`
  974. Config Config `json:"config"`
  975. WorkspaceID string `json:"workspace_id"`
  976. }
  977. type Config struct {
  978. WorkServerNum int `json:"worker_server_num"`
  979. AppUrl string `json:"app_url"` //训练作业的代码目录
  980. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  981. Parameter []Parameter `json:"parameter"`
  982. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  983. EngineID int64 `json:"engine_id"`
  984. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  985. LogUrl string `json:"log_url"`
  986. //UserImageUrl string `json:"user_image_url"`
  987. //UserCommand string `json:"user_command"`
  988. CreateVersion bool `json:"create_version"`
  989. Flavor Flavor `json:"flavor"`
  990. PoolID string `json:"pool_id"`
  991. ShareAddr string `json:"nas_share_addr"`
  992. MountPath string `json:"nas_mount_path"`
  993. NasType string `json:"nas_type"`
  994. }
  995. type CreateInferenceJobParams struct {
  996. JobName string `json:"job_name"`
  997. Description string `json:"job_desc"`
  998. InfConfig InfConfig `json:"config"`
  999. WorkspaceID string `json:"workspace_id"`
  1000. }
  1001. type CreateInfUserImageParams struct {
  1002. JobName string `json:"job_name"`
  1003. Description string `json:"job_desc"`
  1004. Config InfUserImageConfig `json:"config"`
  1005. WorkspaceID string `json:"workspace_id"`
  1006. }
  1007. type InfConfig struct {
  1008. WorkServerNum int `json:"worker_server_num"`
  1009. AppUrl string `json:"app_url"` //训练作业的代码目录
  1010. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1011. Parameter []Parameter `json:"parameter"`
  1012. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1013. EngineID int64 `json:"engine_id"`
  1014. LogUrl string `json:"log_url"`
  1015. CreateVersion bool `json:"create_version"`
  1016. Flavor Flavor `json:"flavor"`
  1017. PoolID string `json:"pool_id"`
  1018. }
  1019. type InfUserImageConfig struct {
  1020. WorkServerNum int `json:"worker_server_num"`
  1021. AppUrl string `json:"app_url"` //训练作业的代码目录
  1022. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1023. Parameter []Parameter `json:"parameter"`
  1024. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1025. EngineID int64 `json:"engine_id"`
  1026. LogUrl string `json:"log_url"`
  1027. CreateVersion bool `json:"create_version"`
  1028. Flavor Flavor `json:"flavor"`
  1029. PoolID string `json:"pool_id"`
  1030. UserImageUrl string `json:"user_image_url"`
  1031. UserCommand string `json:"user_command"`
  1032. }
  1033. type CreateTrainJobVersionParams struct {
  1034. Description string `json:"job_desc"`
  1035. Config TrainJobVersionConfig `json:"config"`
  1036. }
  1037. type CreateTrainJobVersionUserImageParams struct {
  1038. Description string `json:"job_desc"`
  1039. Config TrainJobVersionUserImageConfig `json:"config"`
  1040. }
  1041. type TrainJobVersionConfig struct {
  1042. WorkServerNum int `json:"worker_server_num"`
  1043. AppUrl string `json:"app_url"` //训练作业的代码目录
  1044. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1045. Parameter []Parameter `json:"parameter"`
  1046. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1047. EngineID int64 `json:"engine_id"`
  1048. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  1049. LogUrl string `json:"log_url"`
  1050. Flavor Flavor `json:"flavor"`
  1051. PoolID string `json:"pool_id"`
  1052. PreVersionId int64 `json:"pre_version_id"`
  1053. ShareAddr string `json:"nas_share_addr"`
  1054. MountPath string `json:"nas_mount_path"`
  1055. NasType string `json:"nas_type"`
  1056. }
  1057. type TrainJobVersionUserImageConfig struct {
  1058. WorkServerNum int `json:"worker_server_num"`
  1059. AppUrl string `json:"app_url"` //训练作业的代码目录
  1060. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1061. Parameter []Parameter `json:"parameter"`
  1062. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1063. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  1064. LogUrl string `json:"log_url"`
  1065. Flavor Flavor `json:"flavor"`
  1066. PoolID string `json:"pool_id"`
  1067. PreVersionId int64 `json:"pre_version_id"`
  1068. UserImageUrl string `json:"user_image_url"`
  1069. UserCommand string `json:"user_command"`
  1070. ShareAddr string `json:"nas_share_addr"`
  1071. MountPath string `json:"nas_mount_path"`
  1072. NasType string `json:"nas_type"`
  1073. }
  1074. type CreateConfigParams struct {
  1075. ConfigName string `json:"config_name"`
  1076. Description string `json:"config_desc"`
  1077. WorkServerNum int `json:"worker_server_num"`
  1078. AppUrl string `json:"app_url"` //训练作业的代码目录
  1079. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1080. Parameter []Parameter `json:"parameter"`
  1081. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1082. EngineID int64 `json:"engine_id"`
  1083. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  1084. LogUrl string `json:"log_url"`
  1085. Flavor Flavor `json:"flavor"`
  1086. PoolID string `json:"pool_id"`
  1087. Volumes []Volumes `json:"volumes"`
  1088. }
  1089. type Parameter struct {
  1090. Label string `json:"label"`
  1091. Value string `json:"value"`
  1092. }
  1093. type Parameters struct {
  1094. Parameter []Parameter `json:"parameter"`
  1095. }
  1096. type Datasurl struct {
  1097. DatasetUrl string `json:"dataset_url"`
  1098. DatasetName string `json:"dataset_name"`
  1099. }
  1100. type DatasetDownload struct {
  1101. DatasetName string `json:"dataset_name"`
  1102. DatasetDownloadLink string `json:"dataset_download_link"`
  1103. RepositoryLink string `json:"repository_link"`
  1104. IsDelete bool `json:"is_delete"`
  1105. }
  1106. type ModelDownload struct {
  1107. Name string `json:"name"`
  1108. DownloadLink string `json:"download_link"`
  1109. RepositoryLink string `json:"repository_link"`
  1110. IsDelete bool `json:"is_delete"`
  1111. }
  1112. type DataSource struct {
  1113. DatasetID string `json:"dataset_id"`
  1114. DatasetVersion string `json:"dataset_version"`
  1115. Type string `json:"type"`
  1116. DataUrl string `json:"data_url"`
  1117. }
  1118. type Volumes struct {
  1119. Nfs Nfs `json:"nfs"`
  1120. HostPath HostPath `json:"host_path"`
  1121. }
  1122. type Nfs struct {
  1123. ID string `json:"id"`
  1124. SourcePath string `json:"src_path"`
  1125. DestPath string `json:"dest_path"`
  1126. ReadOnly bool `json:"read_only"`
  1127. }
  1128. type HostPath struct {
  1129. SourcePath string `json:"src_path"`
  1130. DestPath string `json:"dest_path"`
  1131. ReadOnly bool `json:"read_only"`
  1132. }
  1133. type Flavor struct {
  1134. Code string `json:"code"`
  1135. }
  1136. type CreateTrainJobResult struct {
  1137. ErrorCode string `json:"error_code"`
  1138. ErrorMsg string `json:"error_msg"`
  1139. IsSuccess bool `json:"is_success"`
  1140. JobName string `json:"job_name"`
  1141. JobID int64 `json:"job_id"`
  1142. Status int `json:"status"`
  1143. CreateTime int64 `json:"create_time"`
  1144. VersionID int64 `json:"version_id"`
  1145. ResourceID string `json:"resource_id"`
  1146. VersionName string `json:"version_name"`
  1147. }
  1148. type CreateTrainJobConfigResult struct {
  1149. ErrorCode string `json:"error_code"`
  1150. ErrorMsg string `json:"error_msg"`
  1151. IsSuccess bool `json:"is_success"`
  1152. }
  1153. type GetResourceSpecsResult struct {
  1154. ErrorCode string `json:"error_code"`
  1155. ErrorMsg string `json:"error_msg"`
  1156. IsSuccess bool `json:"is_success"`
  1157. SpecTotalCount int `json:"spec_total_count"`
  1158. Specs []Specs `json:"specs"`
  1159. }
  1160. type Specs struct {
  1161. Core string `json:"core"`
  1162. Cpu string `json:"cpu"`
  1163. IsNoResource bool `json:"no_resource"`
  1164. GpuType string `json:"gpu_type"`
  1165. SpecID int64 `json:"spec_id"`
  1166. GpuNum int `json:"gpu_num"`
  1167. SpecCode string `json:"spec_code"`
  1168. Storage string `json:"storage"`
  1169. MaxNum int `json:"max_num"`
  1170. UnitNum int `json:"unit_num"`
  1171. InterfaceType int `json:"interface_type"`
  1172. }
  1173. type GetConfigListResult struct {
  1174. ErrorCode string `json:"error_code"`
  1175. ErrorMsg string `json:"error_msg"`
  1176. IsSuccess bool `json:"is_success"`
  1177. ConfigTotalCount int `json:"config_total_count"`
  1178. ParaConfigs []ParaConfig `json:"configs"`
  1179. }
  1180. type ParaConfig struct {
  1181. ConfigName string `json:"config_name"`
  1182. ConfigDesc string `json:"config_desc"`
  1183. CreateTime int64 `json:"create_time"`
  1184. EngineType int `json:"engine_type"`
  1185. EngineName string `json:"engine_name"`
  1186. EngineId int64 `json:"engine_id"`
  1187. EngineVersion string `json:"engine_version"`
  1188. UserImageUrl string `json:"user_image_url"`
  1189. UserCommand string `json:"user_command"`
  1190. Result GetConfigResult
  1191. }
  1192. type GetConfigResult struct {
  1193. ErrorCode string `json:"error_code"`
  1194. ErrorMsg string `json:"error_msg"`
  1195. IsSuccess bool `json:"is_success"`
  1196. ConfigName string `json:"config_name"`
  1197. Description string `json:"config_desc"`
  1198. WorkServerNum int `json:"worker_server_num"`
  1199. AppUrl string `json:"app_url"` //训练作业的代码目录
  1200. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1201. Parameter []Parameter `json:"parameter"`
  1202. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1203. EngineID int64 `json:"engine_id"`
  1204. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  1205. LogUrl string `json:"log_url"`
  1206. Flavor Flavor `json:"flavor"`
  1207. PoolID string `json:"pool_id"`
  1208. }
  1209. type ErrorResult struct {
  1210. ErrorCode string `json:"error_code"`
  1211. ErrorMsg string `json:"error_message"`
  1212. IsSuccess bool `json:"is_success"`
  1213. }
  1214. type GetTrainJobResult struct {
  1215. IsSuccess bool `json:"is_success"`
  1216. JobName string `json:"job_name"`
  1217. JobID int64 `json:"job_id"`
  1218. Description string `json:"job_desc"`
  1219. IntStatus int `json:"status"`
  1220. Status string
  1221. LongCreateTime int64 `json:"create_time"`
  1222. CreateTime string
  1223. Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒
  1224. TrainJobDuration string //训练作业的运行时间,格式为hh:mm:ss
  1225. VersionID int64 `json:"version_id"`
  1226. ResourceID string `json:"resource_id"`
  1227. VersionName string `json:"version_name"`
  1228. PreVersionID int64 `json:"pre_version_id"`
  1229. WorkServerNum int `json:"worker_server_num"`
  1230. AppUrl string `json:"app_url"` //训练作业的代码目录
  1231. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  1232. Parameter []Parameter `json:"parameter"`
  1233. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  1234. EngineID int64 `json:"engine_id"`
  1235. EngineName string `json:"engine_name"`
  1236. EngineVersion string `json:"engine_version"`
  1237. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  1238. LogUrl string `json:"log_url"`
  1239. Flavor Flavor `json:"flavor"`
  1240. PoolID string `json:"pool_id"`
  1241. PoolName string `json:"pool_name"`
  1242. NasMountPath string `json:"nas_mount_path"`
  1243. NasShareAddr string `json:"nas_share_addr"`
  1244. DatasetName string
  1245. ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话
  1246. StartTime int64 `json:"start_time"` //训练作业开始时间。
  1247. }
  1248. type GetTrainJobLogResult struct {
  1249. ErrorCode string `json:"error_code"`
  1250. ErrorMsg string `json:"error_msg"`
  1251. IsSuccess bool `json:"is_success"`
  1252. Content string `json:"content"`
  1253. Lines int `json:"lines"`
  1254. StartLine string `json:"start_line"`
  1255. EndLine string `json:"end_line"`
  1256. }
  1257. type GetTrainJobLogFileNamesResult struct {
  1258. ErrorCode string `json:"error_code"`
  1259. ErrorMsg string `json:"error_msg"`
  1260. IsSuccess bool `json:"is_success"`
  1261. LogFileList []string `json:"log_file_list"`
  1262. }
  1263. type TrainJobResult struct {
  1264. ErrorCode string `json:"error_code"`
  1265. ErrorMsg string `json:"error_msg"`
  1266. IsSuccess bool `json:"is_success"`
  1267. }
  1268. type LogFile struct {
  1269. Name string
  1270. }
  1271. type JobList struct {
  1272. JobName string `json:"job_name"`
  1273. JobID int64 `json:"job_id"`
  1274. VersionID int64 `json:"version_id"`
  1275. VersionCount int64 `json:"version_count"`
  1276. Description string `json:"job_desc"`
  1277. IntStatus int `json:"status"`
  1278. }
  1279. type GetTrainJobListResult struct {
  1280. ErrorResult
  1281. JobTotalCount int `json:"job_total_count"` //查询到的用户创建作业总数
  1282. JobCountLimit int `json:"job_count_limit"` //用户还可以创建训练作业的数量
  1283. Quotas int `json:"quotas"` //训练作业的运行数量上限
  1284. JobList []JobList `json:"jobs"`
  1285. }
  1286. type JobVersionList struct {
  1287. VersionName string `json:"version_name"`
  1288. VersionID int64 `json:"version_id"`
  1289. IntStatus int `json:"status"`
  1290. }
  1291. type GetTrainJobVersionListResult struct {
  1292. ErrorResult
  1293. JobID int64 `json:"job_id"`
  1294. JobName string `json:"job_name"`
  1295. JobDesc string `json:"job_desc"`
  1296. VersionCount int64 `json:"version_count"`
  1297. JobVersionList []JobVersionList `json:"versions"`
  1298. }
  1299. type NotebookList struct {
  1300. JobName string `json:"name"`
  1301. JobID string `json:"id"`
  1302. Status string `json:"status"`
  1303. }
  1304. type GetNotebookListResult struct {
  1305. TotalCount int64 `json:"total"` //总的记录数量
  1306. CurrentPage int `json:"current"` //当前页数
  1307. TotalPages int `json:"pages"` //总的页数
  1308. Size int `json:"size"` //每一页的数量
  1309. NotebookList []NotebookList `json:"data"`
  1310. }
  1311. //Grampus
  1312. type GrampusResult struct {
  1313. ErrorCode int `json:"errorCode"`
  1314. ErrorMsg string `json:"errorMsg"`
  1315. }
  1316. type GrampusJobInfo struct {
  1317. StartedAt int64 `json:"startedAt"`
  1318. RunSec int64 `json:"runSec"`
  1319. CompletedAt int64 `json:"completedAt"`
  1320. CreatedAt int64 `json:"createdAt"`
  1321. UpdatedAt int64 `json:"updatedAt"`
  1322. Desc string `json:"desc"`
  1323. JobID string `json:"id"`
  1324. Name string `json:"name"`
  1325. Status string `json:"status"`
  1326. UserID string `json:"userId"`
  1327. Tasks []GrampusTasks `json:"tasks"`
  1328. }
  1329. type GrampusNotebookInfo struct {
  1330. StartedAt int64 `json:"startedAt"`
  1331. RunSec int64 `json:"runSec"`
  1332. CompletedAt int64 `json:"completedAt"`
  1333. CreatedAt int64 `json:"createdAt"`
  1334. UpdatedAt int64 `json:"updatedAt"`
  1335. Desc string `json:"desc"`
  1336. JobID string `json:"id"`
  1337. Name string `json:"name"`
  1338. Status string `json:"status"`
  1339. UserID string `json:"userId"`
  1340. Tasks []GrampusNotebookTask `json:"tasks"`
  1341. }
  1342. type Center struct {
  1343. ID string `json:"id"`
  1344. Name string `json:"name"`
  1345. }
  1346. type GrampusSpec struct {
  1347. CreatedAt int64 `json:"createdAt"`
  1348. UpdatedAt int64 `json:"updatedAt"`
  1349. ID string `json:"id"`
  1350. Name string `json:"name"`
  1351. ProcessorType string `json:"processorType"`
  1352. Centers []Center `json:"centers"`
  1353. SpecInfo SpecInfo `json:"specInfo"`
  1354. }
  1355. type GrampusAiCenter struct {
  1356. AccDevices []GrampusAccDevice `json:"accDevices"`
  1357. Id string `json:"id"`
  1358. Name string `json:"name"`
  1359. Resource []GrampusCenterResource `json:"resource"`
  1360. }
  1361. type GrampusAccDevice struct {
  1362. Kind string `json:"kind"` //加速卡类别, npu.huawei.com/NPU,nvidia.com/gpu,cambricon.com/mlu
  1363. Model string `json:"model"` //加速卡型号
  1364. }
  1365. type GrampusCenterResource struct {
  1366. Allocated string `json:"allocated"`
  1367. Capacity string `json:"capacity"`
  1368. Name string `json:"name"`
  1369. }
  1370. type SpecInfo struct {
  1371. AccDeviceKind string `json:"accDeviceKind"`
  1372. AccDeviceMemory string `json:"accDeviceMemory"`
  1373. AccDeviceModel string `json:"accDeviceModel"`
  1374. AccDeviceNum int `json:"accDeviceNum"`
  1375. CpuCoreNum int `json:"cpuCoreNum"`
  1376. MemorySize string `json:"memorySize"`
  1377. }
  1378. type GetGrampusResourceSpecsResult struct {
  1379. GrampusResult
  1380. Infos []GrampusSpec `json:"resourceSpecs"`
  1381. }
  1382. type GetGrampusAiCentersResult struct {
  1383. GrampusResult
  1384. Infos []GrampusAiCenter `json:"aiCenterInfos"`
  1385. TotalSize int `json:"totalSize"`
  1386. }
  1387. type GrampusImage struct {
  1388. CreatedAt int64 `json:"createdAt"`
  1389. UpdatedAt int64 `json:"updatedAt"`
  1390. ID string `json:"id"`
  1391. Name string `json:"name"`
  1392. ProcessorType string `json:"processorType"`
  1393. }
  1394. type GetGrampusImagesResult struct {
  1395. GrampusResult
  1396. TotalSize int `json:"totalSize"`
  1397. Infos []GrampusImage `json:"images"`
  1398. }
  1399. type CreateGrampusJobResponse struct {
  1400. GrampusResult
  1401. JobInfo GrampusJobInfo `json:"otJob"`
  1402. }
  1403. type GetGrampusJobResponse struct {
  1404. GrampusResult
  1405. JobInfo GrampusJobInfo `json:"otJob"`
  1406. ExitDiagnostics string `json:"exitDiagnostics"`
  1407. }
  1408. type GrampusNotebookResponse struct {
  1409. GrampusResult
  1410. JobInfo GrampusNotebookInfo `json:"otJob"`
  1411. }
  1412. type GrampusNotebookRestartResponse struct {
  1413. GrampusResult
  1414. NewId string `json:"newId"`
  1415. Status string `json:"status"`
  1416. }
  1417. type GrampusStopJobResponse struct {
  1418. GrampusResult
  1419. StoppedAt int64 `json:"stoppedAt"`
  1420. ID string `json:"id"`
  1421. Status string `json:"status"`
  1422. }
  1423. type GrampusTasks struct {
  1424. Command string `json:"command"`
  1425. Name string `json:"name"`
  1426. ImageId string `json:"imageId"`
  1427. ResourceSpecId string `json:"resourceSpecId"`
  1428. ImageUrl string `json:"imageUrl"`
  1429. CenterID []string `json:"centerID"`
  1430. CenterName []string `json:"centerName"`
  1431. ReplicaNum int `json:"replicaNum"`
  1432. Datasets []GrampusDataset `json:"datasets"`
  1433. Models []GrampusDataset `json:"models"`
  1434. Code GrampusDataset `json:"code"`
  1435. BootFile string `json:"bootFile"`
  1436. }
  1437. type GrampusNotebookTask struct {
  1438. AutoStopDuration int `json:"autoStopDuration"`
  1439. Name string `json:"name"`
  1440. Capacity int `json:"capacity"`
  1441. CenterID []string `json:"centerID"`
  1442. CenterName []string `json:"centerName"`
  1443. Code GrampusDataset `json:"code"`
  1444. Datasets []GrampusDataset `json:"datasets"`
  1445. CodeUrl string `json:"codeUrl"`
  1446. DataUrl string `json:"dataUrl"`
  1447. ImageId string `json:"imageId"`
  1448. ImageUrl string `json:"imageUrl"`
  1449. ResourceSpecId string `json:"resourceSpecId"`
  1450. Token string `json:"token"`
  1451. Url string `json:"url"`
  1452. Status string `json:"status"`
  1453. Command string `json:"command"`
  1454. }
  1455. type GrampusDataset struct {
  1456. Name string `json:"name"`
  1457. Bucket string `json:"bucket"`
  1458. EndPoint string `json:"endPoint"`
  1459. ObjectKey string `json:"objectKey"`
  1460. ContainerPath string `json:"containerPath"`
  1461. ReadOnly bool `json:"readOnly"`
  1462. }
  1463. type CreateGrampusJobRequest struct {
  1464. Name string `json:"name"`
  1465. Tasks []GrampusTasks `json:"tasks"`
  1466. }
  1467. type CreateGrampusNotebookRequest struct {
  1468. Name string `json:"name"`
  1469. Tasks []GrampusNotebookTask `json:"tasks"`
  1470. }
  1471. type GetTrainJobMetricStatisticResult struct {
  1472. TrainJobResult
  1473. Interval int `json:"interval"` //查询的时间间隔,单位为分钟
  1474. MetricsInfo []Metrics `json:"metrics"` //监控详情
  1475. }
  1476. type Metrics struct {
  1477. Metric string `json:"metric"` //监控指标项
  1478. Value []string `json:"value"` //获取的监控值的序列,元素为String类型
  1479. }
  1480. func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  1481. sess := x.NewSession()
  1482. defer sess.Close()
  1483. var cond = builder.NewCond()
  1484. if opts.RepoID > 0 {
  1485. cond = cond.And(
  1486. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  1487. )
  1488. }
  1489. if opts.UserID > 0 {
  1490. cond = cond.And(
  1491. builder.Eq{"cloudbrain.user_id": opts.UserID},
  1492. )
  1493. }
  1494. if (opts.JobID) != "" {
  1495. cond = cond.And(
  1496. builder.Eq{"cloudbrain.job_id": opts.JobID},
  1497. )
  1498. }
  1499. if (opts.ComputeResource) != "" {
  1500. cond = cond.And(
  1501. builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource},
  1502. )
  1503. }
  1504. if (opts.Type) >= 0 {
  1505. cond = cond.And(
  1506. builder.Eq{"cloudbrain.type": opts.Type},
  1507. )
  1508. }
  1509. if len(opts.JobTypes) > 0 {
  1510. if opts.JobTypeNot {
  1511. cond = cond.And(
  1512. builder.NotIn("cloudbrain.job_type", opts.JobTypes),
  1513. )
  1514. } else {
  1515. cond = cond.And(
  1516. builder.In("cloudbrain.job_type", opts.JobTypes),
  1517. )
  1518. }
  1519. }
  1520. if (opts.AiCenter) != "" {
  1521. if opts.AiCenter == AICenterOfCloudBrainOne {
  1522. cond = cond.And(
  1523. builder.Eq{"cloudbrain.type": TypeCloudBrainOne},
  1524. )
  1525. } else if opts.AiCenter == AICenterOfCloudBrainTwo {
  1526. cond = cond.And(
  1527. builder.Eq{"cloudbrain.type": TypeCloudBrainTwo},
  1528. )
  1529. } else if opts.AiCenter == AICenterOfChengdu {
  1530. cond = cond.And(
  1531. builder.Eq{"cloudbrain.type": TypeCDCenter},
  1532. )
  1533. } else {
  1534. cond = cond.And(
  1535. builder.Like{"cloudbrain.ai_center", opts.AiCenter},
  1536. )
  1537. }
  1538. }
  1539. if (opts.Cluster) != "" {
  1540. if opts.Cluster == "resource_cluster_openi" {
  1541. cond = cond.And(
  1542. builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, builder.Eq{"cloudbrain.type": TypeCDCenter}),
  1543. )
  1544. }
  1545. if opts.Cluster == "resource_cluster_c2net" {
  1546. cond = cond.And(
  1547. builder.Eq{"cloudbrain.type": TypeC2Net},
  1548. )
  1549. }
  1550. }
  1551. if (opts.IsLatestVersion) != "" {
  1552. cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"}))
  1553. }
  1554. if len(opts.CloudbrainIDs) > 0 {
  1555. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  1556. }
  1557. if len(opts.JobStatus) > 0 {
  1558. if opts.JobStatusNot {
  1559. cond = cond.And(
  1560. builder.NotIn("cloudbrain.status", opts.JobStatus),
  1561. )
  1562. } else {
  1563. cond = cond.And(
  1564. builder.In("cloudbrain.status", opts.JobStatus),
  1565. )
  1566. }
  1567. }
  1568. if len(opts.RepoIDList) > 0 {
  1569. cond = cond.And(
  1570. builder.In("cloudbrain.repo_id", opts.RepoIDList),
  1571. )
  1572. }
  1573. var count int64
  1574. var err error
  1575. condition := "cloudbrain.user_id = `user`.id"
  1576. if len(opts.Keyword) == 0 {
  1577. count, err = sess.Where(cond).Count(new(Cloudbrain))
  1578. } else {
  1579. lowerKeyWord := strings.ToLower(opts.Keyword)
  1580. cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord}))
  1581. count, err = sess.Table(&Cloudbrain{}).Where(cond).
  1582. Join("left", "`user`", condition).Count(new(CloudbrainInfo))
  1583. }
  1584. if err != nil {
  1585. return nil, 0, fmt.Errorf("Count: %v", err)
  1586. }
  1587. if opts.Page >= 0 && opts.PageSize > 0 {
  1588. var start int
  1589. if opts.Page == 0 {
  1590. start = 0
  1591. } else {
  1592. start = (opts.Page - 1) * opts.PageSize
  1593. }
  1594. sess.Limit(opts.PageSize, start)
  1595. }
  1596. sess.OrderBy("cloudbrain.created_unix DESC")
  1597. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  1598. if err := sess.Table(&Cloudbrain{}).Where(cond).
  1599. Join("left", "`user`", condition).
  1600. Find(&cloudbrains); err != nil {
  1601. return nil, 0, fmt.Errorf("Find: %v", err)
  1602. }
  1603. if opts.NeedRepoInfo {
  1604. var ids []int64
  1605. for _, task := range cloudbrains {
  1606. ids = append(ids, task.RepoID)
  1607. }
  1608. repositoryMap, err := GetRepositoriesMapByIDs(ids)
  1609. if err == nil {
  1610. for _, task := range cloudbrains {
  1611. task.Repo = repositoryMap[task.RepoID]
  1612. }
  1613. }
  1614. }
  1615. return cloudbrains, count, nil
  1616. }
  1617. func QueryModelTrainJobVersionList(jobId string) ([]*Cloudbrain, int, error) {
  1618. sess := x.NewSession()
  1619. defer sess.Close()
  1620. var cond = builder.NewCond()
  1621. cond = cond.And(
  1622. builder.Eq{"cloudbrain.job_id": jobId},
  1623. )
  1624. cond = cond.And(
  1625. builder.In("cloudbrain.Status", "COMPLETED", "SUCCEEDED"),
  1626. //builder.Eq{"cloudbrain.Status": "COMPLETED"},
  1627. )
  1628. sess.OrderBy("cloudbrain.created_unix DESC")
  1629. cloudbrains := make([]*Cloudbrain, 0)
  1630. if err := sess.Table(&Cloudbrain{}).Where(cond).
  1631. Find(&cloudbrains); err != nil {
  1632. return nil, 0, fmt.Errorf("Find: %v", err)
  1633. }
  1634. return cloudbrains, int(len(cloudbrains)), nil
  1635. }
  1636. func QueryModelTrainJobList(repoId int64) ([]*Cloudbrain, int, error) {
  1637. sess := x.NewSession()
  1638. defer sess.Close()
  1639. var cond = builder.NewCond()
  1640. cond = cond.And(
  1641. builder.Eq{"repo_id": repoId},
  1642. )
  1643. cond = cond.And(
  1644. builder.In("Status", "COMPLETED", "SUCCEEDED"),
  1645. )
  1646. cond = cond.And(
  1647. builder.Eq{"job_type": "TRAIN"},
  1648. )
  1649. // cond = cond.And(
  1650. // builder.In("type", 0, 1),
  1651. // )
  1652. cloudbrains := make([]*Cloudbrain, 0)
  1653. if err := sess.Select("job_id,display_job_name").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC").
  1654. Find(&cloudbrains); err != nil {
  1655. return nil, 0, fmt.Errorf("Find: %v", err)
  1656. }
  1657. keys := make(map[string]string)
  1658. uniqueElements := make([]*Cloudbrain, 0)
  1659. for _, entry := range cloudbrains {
  1660. if _, value := keys[entry.JobID]; !value {
  1661. keys[entry.JobID] = entry.DisplayJobName
  1662. uniqueElements = append(uniqueElements, entry)
  1663. }
  1664. }
  1665. return uniqueElements, int(len(uniqueElements)), nil
  1666. }
  1667. func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, error) {
  1668. sess := x.NewSession()
  1669. defer sess.Close()
  1670. var cond = builder.NewCond()
  1671. if opts.RepoID > 0 {
  1672. cond = cond.And(
  1673. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  1674. )
  1675. }
  1676. if opts.UserID > 0 {
  1677. cond = cond.And(
  1678. builder.Eq{"cloudbrain.user_id": opts.UserID},
  1679. )
  1680. }
  1681. if (opts.Type) >= 0 {
  1682. cond = cond.And(
  1683. builder.Eq{"cloudbrain.type": opts.Type},
  1684. )
  1685. }
  1686. if (opts.JobID) != "" {
  1687. cond = cond.And(
  1688. builder.Eq{"cloudbrain.job_id": opts.JobID},
  1689. )
  1690. }
  1691. if len(opts.JobTypes) > 0 {
  1692. cond = cond.And(
  1693. builder.In("cloudbrain.job_type", opts.JobTypes),
  1694. )
  1695. }
  1696. if len(opts.CloudbrainIDs) > 0 {
  1697. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  1698. }
  1699. count, err := sess.Where(cond).Count(new(Cloudbrain))
  1700. if err != nil {
  1701. return nil, 0, fmt.Errorf("Count: %v", err)
  1702. }
  1703. if opts.Page >= 0 && opts.PageSize > 0 {
  1704. var start int
  1705. if opts.Page == 0 {
  1706. start = 0
  1707. } else {
  1708. start = (opts.Page - 1) * opts.PageSize
  1709. }
  1710. sess.Limit(opts.PageSize, start)
  1711. }
  1712. sess.OrderBy("cloudbrain.created_unix DESC")
  1713. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  1714. if err := sess.Table(&Cloudbrain{}).Where(cond).
  1715. Join("left", "`user`", "cloudbrain.user_id = `user`.id").
  1716. Find(&cloudbrains); err != nil {
  1717. return nil, 0, fmt.Errorf("Find: %v", err)
  1718. }
  1719. return cloudbrains, int(count), nil
  1720. }
  1721. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  1722. session := x.NewSession()
  1723. defer session.Close()
  1724. err = session.Begin()
  1725. cloudbrain.TrainJobDuration = DURATION_STR_ZERO
  1726. if _, err = session.NoAutoTime().InsertOne(cloudbrain); err != nil {
  1727. session.Rollback()
  1728. return err
  1729. }
  1730. if cloudbrain.Spec != nil {
  1731. if _, err = session.Insert(NewCloudBrainSpec(cloudbrain.ID, *cloudbrain.Spec)); err != nil {
  1732. session.Rollback()
  1733. return err
  1734. }
  1735. }
  1736. session.Commit()
  1737. go IncreaseDatasetUseCount(cloudbrain.Uuid)
  1738. go OperateRepoAITaskNum(cloudbrain.RepoID, 1)
  1739. return nil
  1740. }
  1741. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  1742. has, err := x.Get(cb)
  1743. if err != nil {
  1744. return nil, err
  1745. } else if !has {
  1746. return nil, ErrJobNotExist{}
  1747. }
  1748. return cb, nil
  1749. }
  1750. func getRepoCloudBrainWithDeleted(cb *Cloudbrain) (*Cloudbrain, error) {
  1751. has, err := x.Unscoped().Get(cb)
  1752. if err != nil {
  1753. return nil, err
  1754. } else if !has {
  1755. return nil, ErrJobNotExist{}
  1756. }
  1757. return cb, nil
  1758. }
  1759. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  1760. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  1761. return getRepoCloudBrain(cb)
  1762. }
  1763. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  1764. cb := &Cloudbrain{JobID: jobID}
  1765. return getRepoCloudBrain(cb)
  1766. }
  1767. func GetCloudbrainByJobIDWithDeleted(jobID string) (*Cloudbrain, error) {
  1768. cb := &Cloudbrain{JobID: jobID}
  1769. return getRepoCloudBrainWithDeleted(cb)
  1770. }
  1771. func GetCloudbrainByID(id string) (*Cloudbrain, error) {
  1772. idInt64, _ := strconv.ParseInt(id, 10, 64)
  1773. cb := &Cloudbrain{ID: idInt64}
  1774. return getRepoCloudBrain(cb)
  1775. }
  1776. func IsCloudbrainExistByJobName(jobName string) (bool, error) {
  1777. return x.Unscoped().Exist(&Cloudbrain{
  1778. JobName: jobName,
  1779. })
  1780. }
  1781. func GetCloudbrainByIDWithDeleted(id string) (*Cloudbrain, error) {
  1782. idInt64, _ := strconv.ParseInt(id, 10, 64)
  1783. cb := &Cloudbrain{ID: idInt64}
  1784. return getRepoCloudBrainWithDeleted(cb)
  1785. }
  1786. func GetCloudbrainByJobIDAndVersionName(jobID string, versionName string) (*Cloudbrain, error) {
  1787. cb := &Cloudbrain{JobID: jobID, VersionName: versionName}
  1788. return getRepoCloudBrain(cb)
  1789. }
  1790. func GetCloudbrainByJobIDAndIsLatestVersion(jobID string, isLatestVersion string) (*Cloudbrain, error) {
  1791. cb := &Cloudbrain{JobID: jobID, IsLatestVersion: isLatestVersion}
  1792. return getRepoCloudBrain(cb)
  1793. }
  1794. func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) {
  1795. cloudBrains := make([]*Cloudbrain, 0)
  1796. err := x.Cols("job_id", "status", "type", "job_type", "version_id", "start_time").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains)
  1797. return cloudBrains, err
  1798. }
  1799. func GetModelartsReDebugTaskByJobId(jobID string) ([]*Cloudbrain, error) {
  1800. sess := x.NewSession()
  1801. defer sess.Close()
  1802. var cond = builder.NewCond()
  1803. cond = cond.And(
  1804. builder.Eq{"cloudbrain.job_id": jobID},
  1805. )
  1806. sess.OrderBy("cloudbrain.created_unix ASC limit 1")
  1807. cloudbrains := make([]*Cloudbrain, 0, 10)
  1808. if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond).
  1809. Find(&cloudbrains); err != nil {
  1810. log.Info("find error.")
  1811. }
  1812. return cloudbrains, nil
  1813. }
  1814. func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) {
  1815. cloudBrains := make([]*Cloudbrain, 0)
  1816. err := x.Cols("job_id", "status", "type", "job_type", "version_id", "start_time").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains)
  1817. return cloudBrains, err
  1818. }
  1819. func GetCloudbrainsNeededDeleteByRepoID(repoID int64) ([]*Cloudbrain, error) {
  1820. cloudBrains := make([]*Cloudbrain, 0)
  1821. err := x.Where("repo_id=?", repoID).Find(&cloudBrains)
  1822. return cloudBrains, err
  1823. }
  1824. func GetCloudbrainsByDisplayJobName(repoID int64, jobType string, displayJobName string) ([]*Cloudbrain, error) {
  1825. cloudBrains := make([]*Cloudbrain, 0)
  1826. err := x.Cols("job_id", "job_name", "repo_id", "user_id", "job_type", "display_job_name").Where("repo_id=? AND job_type =? AND lower(display_job_name) = lower(?)", repoID, jobType, displayJobName).Find(&cloudBrains)
  1827. return cloudBrains, err
  1828. }
  1829. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  1830. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  1831. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  1832. return
  1833. }
  1834. func SetTrainJobStatusByJobID(jobID string, status string, duration int64, trainjobduration string) (err error) {
  1835. cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration}
  1836. _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb)
  1837. return
  1838. }
  1839. func SetVersionCountAndLatestVersion(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) {
  1840. cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount}
  1841. _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb)
  1842. return
  1843. }
  1844. func UpdateJob(job *Cloudbrain) error {
  1845. return updateJob(x, job)
  1846. }
  1847. func UpdateJobDurationWithDeleted(job *Cloudbrain) error {
  1848. _, err := x.Exec("update cloudbrain set start_time=?, end_time=?,train_job_duration=?,duration=? where id=?", job.StartTime, job.EndTime, job.TrainJobDuration, job.Duration, job.ID)
  1849. return err
  1850. }
  1851. func updateJob(e Engine, job *Cloudbrain) error {
  1852. _, err := e.ID(job.ID).AllCols().Update(job)
  1853. return err
  1854. }
  1855. func UpdateTrainJobVersion(job *Cloudbrain) error {
  1856. return updateJobTrainVersion(x, job)
  1857. }
  1858. func updateJobTrainVersion(e Engine, job *Cloudbrain) error {
  1859. var sess *xorm.Session
  1860. sess = e.Where("job_id = ? AND version_name=?", job.JobID, job.VersionName)
  1861. _, err := sess.Cols("status", "train_job_duration", "duration", "start_time", "end_time", "created_unix", "ai_center").Update(job)
  1862. return err
  1863. }
  1864. func DeleteJob(job *Cloudbrain) error {
  1865. return deleteJob(x, job)
  1866. }
  1867. func deleteJob(e Engine, job *Cloudbrain) error {
  1868. _, err := e.ID(job.ID).Delete(job)
  1869. if err == nil {
  1870. go updateAITaskNumWhenDeleteJob(job)
  1871. }
  1872. return err
  1873. }
  1874. func updateAITaskNumWhenDeleteJob(job *Cloudbrain) {
  1875. repoId := job.RepoID
  1876. if repoId == 0 {
  1877. t := &Cloudbrain{}
  1878. _, tempErr := x.ID(job.ID).Unscoped().Get(t)
  1879. if tempErr != nil {
  1880. log.Error("updateAITaskNumWhenDeleteJob error.%v", tempErr)
  1881. return
  1882. }
  1883. repoId = t.RepoID
  1884. }
  1885. if repoId > 0 {
  1886. go OperateRepoAITaskNum(repoId, -1)
  1887. }
  1888. }
  1889. func GetCloudbrainByName(jobName string) (*Cloudbrain, error) {
  1890. cb := &Cloudbrain{JobName: jobName}
  1891. return getRepoCloudBrain(cb)
  1892. }
  1893. func GetWaitOrRunFileNotebookByRepo(repoId int64, cloudbrainType int) (*Cloudbrain, error) {
  1894. cloudBrain := new(Cloudbrain)
  1895. has, err := x.In("status", JobWaiting, JobRunning, ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting,
  1896. ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsDeleting, ModelArtsRestarting).Where("repo_id=? and type=? and boot_file!=''", repoId, cloudbrainType).Get(cloudBrain)
  1897. if has {
  1898. return cloudBrain, err
  1899. }
  1900. return nil, err
  1901. }
  1902. func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool {
  1903. if !isSigned || (job.Status != string(JobStopped) && job.Status != string(JobFailed) && job.Status != string(ModelArtsStartFailed) && job.Status != string(ModelArtsCreateFailed)) {
  1904. return false
  1905. }
  1906. repo, err := GetRepositoryByID(job.RepoID)
  1907. if err != nil {
  1908. log.Error("GetRepositoryByID failed:%v", err.Error())
  1909. return false
  1910. }
  1911. permission, _ := GetUserRepoPermission(repo, user)
  1912. if err != nil {
  1913. log.Error("GetUserRepoPermission failed:%v", err.Error())
  1914. return false
  1915. }
  1916. if (user.ID == job.UserID && permission.AccessMode >= AccessModeWrite) || user.IsAdmin || permission.AccessMode >= AccessModeAdmin {
  1917. return true
  1918. }
  1919. return false
  1920. }
  1921. func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) {
  1922. cloudbrains := make([]*Cloudbrain, 0, 10)
  1923. return cloudbrains, x.
  1924. NotIn("status",
  1925. JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted,
  1926. ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed,
  1927. ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed).
  1928. Limit(100).
  1929. Find(&cloudbrains)
  1930. }
  1931. func GetGPUStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) {
  1932. cloudbrains := make([]*Cloudbrain, 0, 10)
  1933. endTimeBefore := time.Now().Unix() - int64(days)*24*3600
  1934. missEndTimeBefore := endTimeBefore - 24*3600
  1935. return cloudbrains, x.Unscoped().Cols("id,job_name,job_id").
  1936. In("status",
  1937. JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted,
  1938. ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed,
  1939. ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed).
  1940. Where("(((end_time is null or end_time=0) and updated_unix<? and updated_unix != 0 ) or (end_time<? and end_time != 0)) and cleared=false and (type=0 or (type =2 and compute_resource='CPU/GPU')) and job_type != 'DEBUG'", missEndTimeBefore, endTimeBefore).
  1941. Limit(limit).
  1942. Find(&cloudbrains)
  1943. }
  1944. /**
  1945. 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间
  1946. */
  1947. func GetGPUStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) {
  1948. cloudbrains := make([]*Cloudbrain, 0, 10)
  1949. endTimeBefore := time.Now().Unix() - int64(days)*24*3600
  1950. missEndTimeBefore := endTimeBefore - 24*3600
  1951. sql := `SELECT id,job_name,job_id from (SELECT DISTINCT ON (job_name)
  1952. id, job_name, job_id,status,end_time,updated_unix,cleared
  1953. FROM cloudbrain
  1954. where (type=0 or (type =2 and compute_resource='CPU/GPU')) and job_type='DEBUG'
  1955. ORDER BY job_name, updated_unix DESC) a
  1956. where status in ('STOPPED','SUCCEEDED','FAILED') and (((end_time is null or end_time=0) and updated_unix<? and updated_unix != 0 ) or (end_time<? and end_time != 0)) and cleared=false`
  1957. return cloudbrains, x.Unscoped().SQL(sql, missEndTimeBefore, endTimeBefore).Limit(limit).Find(&cloudbrains)
  1958. }
  1959. func UpdateCloudBrainRecordsCleared(ids []int64) error {
  1960. pageSize := 150
  1961. n := len(ids) / pageSize
  1962. var err error
  1963. for i := 1; i <= n+1; i++ {
  1964. tempIds := getPageIds(ids, i, pageSize)
  1965. if len(tempIds) > 0 {
  1966. idsIn := ""
  1967. for i, id := range tempIds {
  1968. if i == 0 {
  1969. idsIn += strconv.FormatInt(id, 10)
  1970. } else {
  1971. idsIn += "," + strconv.FormatInt(id, 10)
  1972. }
  1973. }
  1974. _, errTemp := x.Unscoped().Exec("update cloudbrain set cleared=true where id in (" + idsIn + ")")
  1975. if errTemp != nil {
  1976. err = errTemp
  1977. }
  1978. }
  1979. }
  1980. return err
  1981. }
  1982. func getPageIds(ids []int64, page int, pagesize int) []int64 {
  1983. begin := (page - 1) * pagesize
  1984. end := (page) * pagesize
  1985. if begin > len(ids)-1 {
  1986. return []int64{}
  1987. }
  1988. if end > len(ids)-1 {
  1989. return ids[begin:]
  1990. } else {
  1991. return ids[begin:end]
  1992. }
  1993. }
  1994. func GetStoppedJobWithNoDurationJob() ([]*Cloudbrain, error) {
  1995. cloudbrains := make([]*Cloudbrain, 0)
  1996. return cloudbrains, x.
  1997. In("status", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded).
  1998. Where("train_job_duration is null or train_job_duration = '' ").
  1999. Limit(100).
  2000. Find(&cloudbrains)
  2001. }
  2002. func GetStoppedJobWithNoStartTimeEndTime() ([]*Cloudbrain, error) {
  2003. cloudbrains := make([]*Cloudbrain, 0)
  2004. return cloudbrains, x.SQL("select * from cloudbrain where status in (?,?,?,?,?,?,?) and (start_time is null or end_time is null) limit 100", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded).Find(&cloudbrains)
  2005. }
  2006. func GetC2NetWithAiCenterWrongJob() ([]*Cloudbrain, error) {
  2007. cloudbrains := make([]*Cloudbrain, 0)
  2008. return cloudbrains, x.
  2009. In("status", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded).
  2010. Where("type = ?", TypeC2Net).
  2011. Find(&cloudbrains)
  2012. }
  2013. func GetModelSafetyTestTask() ([]*Cloudbrain, error) {
  2014. cloudbrains := make([]*Cloudbrain, 0)
  2015. sess := x.Where("job_type=?", string(JobTypeModelSafety))
  2016. err := sess.Find(&cloudbrains)
  2017. return cloudbrains, err
  2018. }
  2019. func GetCloudbrainRunCountByRepoID(repoID int64) (int, error) {
  2020. count, err := x.In("status", JobWaiting, JobRunning, ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting,
  2021. ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsDeleting, ModelArtsRestarting, ModelArtsTrainJobInit,
  2022. ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsStopping, ModelArtsResizing,
  2023. ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobKilling, ModelArtsTrainJobCheckRunningCompleted).And("repo_id = ?", repoID).Count(new(Cloudbrain))
  2024. return int(count), err
  2025. }
  2026. func GetModelSafetyCountByUserID(userID int64) (int, error) {
  2027. count, err := x.In("status", JobWaiting, JobRunning, ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted).And("job_type = ? and user_id = ?", string(JobTypeModelSafety), userID).Count(new(Cloudbrain))
  2028. return int(count), err
  2029. }
  2030. func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTypes ...JobType) (int64, error) {
  2031. sess := x.Where("status=? and type=?", JobWaiting, cloudbrainType)
  2032. if len(jobTypes) > 0 {
  2033. sess.In("job_type", jobTypes)
  2034. }
  2035. if computeResource != "" {
  2036. sess.And("compute_resource=?", computeResource)
  2037. }
  2038. return sess.Count(new(Cloudbrain))
  2039. }
  2040. func GetNotFinalStatusTaskCount(userID int64, notFinalStatus []string, jobTypes []JobType) (int, error) {
  2041. count, err := x.In("status", notFinalStatus).
  2042. In("job_type", jobTypes).
  2043. And("user_id = ? ", userID).Count(new(Cloudbrain))
  2044. return int(count), err
  2045. }
  2046. func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) {
  2047. sess := x.NewSession()
  2048. defer sess.Close()
  2049. if err = sess.Begin(); err != nil {
  2050. return err
  2051. }
  2052. if _, err = sess.Delete(old); err != nil {
  2053. sess.Rollback()
  2054. return err
  2055. }
  2056. if _, err = sess.NoAutoTime().InsertOne(new); err != nil {
  2057. sess.Rollback()
  2058. return err
  2059. }
  2060. if new.Spec != nil {
  2061. if _, err = sess.Insert(NewCloudBrainSpec(new.ID, *new.Spec)); err != nil {
  2062. sess.Rollback()
  2063. return err
  2064. }
  2065. }
  2066. if err = sess.Commit(); err != nil {
  2067. return err
  2068. }
  2069. go IncreaseDatasetUseCount(new.Uuid)
  2070. return nil
  2071. }
  2072. func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  2073. sess := x.NewSession()
  2074. defer sess.Close()
  2075. var cond = builder.NewCond()
  2076. if opts.RepoID > 0 {
  2077. cond = cond.And(
  2078. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  2079. )
  2080. }
  2081. if opts.UserID > 0 {
  2082. cond = cond.And(
  2083. builder.Eq{"cloudbrain.user_id": opts.UserID},
  2084. )
  2085. }
  2086. if (opts.JobID) != "" {
  2087. cond = cond.And(
  2088. builder.Eq{"cloudbrain.job_id": opts.JobID},
  2089. )
  2090. }
  2091. if (opts.ComputeResource) != "" {
  2092. cond = cond.And(
  2093. builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource},
  2094. )
  2095. }
  2096. if (opts.Type) >= 0 {
  2097. cond = cond.And(
  2098. builder.Eq{"cloudbrain.type": opts.Type},
  2099. )
  2100. }
  2101. if len(opts.JobTypes) > 0 {
  2102. if opts.JobTypeNot {
  2103. cond = cond.And(
  2104. builder.NotIn("cloudbrain.job_type", opts.JobTypes),
  2105. )
  2106. } else {
  2107. cond = cond.And(
  2108. builder.In("cloudbrain.job_type", opts.JobTypes),
  2109. )
  2110. }
  2111. }
  2112. if (opts.AiCenter) != "" {
  2113. cond = cond.And(
  2114. builder.Like{"cloudbrain.ai_center", opts.AiCenter},
  2115. )
  2116. }
  2117. if (opts.NeedDeleteInfo) != "" {
  2118. if opts.NeedDeleteInfo == "yes" {
  2119. cond = cond.And(
  2120. builder.And(builder.NotNull{"cloudbrain.deleted_at"}),
  2121. )
  2122. }
  2123. if opts.NeedDeleteInfo == "no" {
  2124. cond = cond.And(
  2125. builder.And(builder.IsNull{"cloudbrain.deleted_at"}),
  2126. )
  2127. }
  2128. }
  2129. if (opts.IsLatestVersion) != "" {
  2130. cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion},
  2131. builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"}))
  2132. }
  2133. if len(opts.CloudbrainIDs) > 0 {
  2134. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  2135. }
  2136. if len(opts.JobStatus) > 0 {
  2137. if opts.JobStatusNot {
  2138. cond = cond.And(
  2139. builder.NotIn("cloudbrain.status", opts.JobStatus),
  2140. )
  2141. } else {
  2142. cond = cond.And(
  2143. builder.In("cloudbrain.status", opts.JobStatus),
  2144. )
  2145. }
  2146. }
  2147. if len(opts.RepoIDList) > 0 {
  2148. cond = cond.And(
  2149. builder.In("cloudbrain.repo_id", opts.RepoIDList),
  2150. )
  2151. }
  2152. if opts.BeginTimeUnix > 0 && opts.EndTimeUnix > 0 {
  2153. cond = cond.And(
  2154. builder.And(builder.Gte{"cloudbrain.created_unix": opts.BeginTimeUnix}, builder.Lte{"cloudbrain.created_unix": opts.EndTimeUnix}),
  2155. )
  2156. }
  2157. if opts.WorkServerNumber > 0 {
  2158. if opts.WorkServerNumber == 1 {
  2159. cond = cond.And(builder.Or(
  2160. builder.Eq{"cloudbrain.work_server_number": 0},
  2161. builder.Eq{"cloudbrain.work_server_number": 1},
  2162. builder.IsNull{"cloudbrain.work_server_number"},
  2163. ))
  2164. } else {
  2165. cond = cond.And(
  2166. builder.Eq{"cloudbrain.work_server_number": opts.WorkServerNumber},
  2167. )
  2168. }
  2169. }
  2170. if opts.AccCardType != "" {
  2171. cond = cond.And(builder.Eq{"cloudbrain_spec.acc_card_type": opts.AccCardType})
  2172. }
  2173. if opts.AccCardsNum >= 0 {
  2174. cond = cond.And(builder.Eq{"cloudbrain_spec.acc_cards_num": opts.AccCardsNum})
  2175. }
  2176. var count int64
  2177. var err error
  2178. condition := "cloudbrain.user_id = `user`.id"
  2179. if len(opts.Keyword) == 0 {
  2180. count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond).
  2181. Join("left", "`user`", condition).
  2182. Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id").
  2183. Count(new(CloudbrainInfo))
  2184. } else {
  2185. lowerKeyWord := strings.ToLower(opts.Keyword)
  2186. cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord},
  2187. builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord}))
  2188. count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond).
  2189. Join("left", "`user`", condition).
  2190. Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id").
  2191. Count(new(CloudbrainInfo))
  2192. }
  2193. if err != nil {
  2194. return nil, 0, fmt.Errorf("Count: %v", err)
  2195. }
  2196. if opts.Page >= 0 && opts.PageSize > 0 {
  2197. var start int
  2198. if opts.Page == 0 {
  2199. start = 0
  2200. } else {
  2201. start = (opts.Page - 1) * opts.PageSize
  2202. }
  2203. sess.Limit(opts.PageSize, start)
  2204. }
  2205. sess.OrderBy("cloudbrain.created_unix DESC")
  2206. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  2207. if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond).
  2208. Join("left", "`user`", condition).
  2209. Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id").
  2210. Find(&cloudbrains); err != nil {
  2211. return nil, 0, fmt.Errorf("Find: %v", err)
  2212. }
  2213. if opts.NeedRepoInfo {
  2214. var ids []int64
  2215. for _, task := range cloudbrains {
  2216. ids = append(ids, task.RepoID)
  2217. }
  2218. repositoryMap, err := GetRepositoriesMapByIDs(ids)
  2219. if err == nil {
  2220. for _, task := range cloudbrains {
  2221. task.Repo = repositoryMap[task.RepoID]
  2222. }
  2223. }
  2224. }
  2225. return cloudbrains, count, nil
  2226. }
  2227. func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  2228. sess := x.NewSession()
  2229. defer sess.Close()
  2230. var cond = builder.NewCond()
  2231. if (opts.Type) >= 0 {
  2232. cond = cond.And(
  2233. builder.Eq{"cloudbrain.type": opts.Type},
  2234. )
  2235. }
  2236. if opts.BeginTimeUnix > 0 && opts.EndTimeUnix > 0 {
  2237. cond = cond.And(
  2238. builder.And(builder.Gte{"cloudbrain.created_unix": opts.BeginTimeUnix}, builder.Lte{"cloudbrain.created_unix": opts.EndTimeUnix}),
  2239. )
  2240. }
  2241. var count int64
  2242. var err error
  2243. count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain))
  2244. if err != nil {
  2245. return nil, 0, fmt.Errorf("Count: %v", err)
  2246. }
  2247. if opts.Page >= 0 && opts.PageSize > 0 {
  2248. var start int
  2249. if opts.Page == 0 {
  2250. start = 0
  2251. } else {
  2252. start = (opts.Page - 1) * opts.PageSize
  2253. }
  2254. sess.Limit(opts.PageSize, start)
  2255. }
  2256. // sess.OrderBy("cloudbrain.created_unix DESC")
  2257. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  2258. if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond).
  2259. Find(&cloudbrains); err != nil {
  2260. return nil, 0, fmt.Errorf("Find: %v", err)
  2261. }
  2262. if opts.NeedRepoInfo {
  2263. var ids []int64
  2264. for _, task := range cloudbrains {
  2265. ids = append(ids, task.RepoID)
  2266. }
  2267. repositoryMap, err := GetRepositoriesMapByIDs(ids)
  2268. if err == nil {
  2269. for _, task := range cloudbrains {
  2270. task.Repo = repositoryMap[task.RepoID]
  2271. }
  2272. }
  2273. }
  2274. return cloudbrains, count, nil
  2275. }
  2276. func CloudbrainAllKanBan(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  2277. sess := x.NewSession()
  2278. defer sess.Close()
  2279. var cond = builder.NewCond()
  2280. if (opts.Type) >= 0 {
  2281. cond = cond.And(
  2282. builder.Eq{"cloudbrain.type": opts.Type},
  2283. )
  2284. }
  2285. if opts.BeginTimeUnix > 0 && opts.EndTimeUnix > 0 {
  2286. cond = cond.And(
  2287. builder.And(builder.Gte{"cloudbrain.created_unix": opts.BeginTimeUnix}, builder.Lte{"cloudbrain.created_unix": opts.EndTimeUnix}),
  2288. )
  2289. }
  2290. var count int64
  2291. var err error
  2292. count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain))
  2293. if err != nil {
  2294. return nil, 0, fmt.Errorf("Count: %v", err)
  2295. }
  2296. if opts.Page >= 0 && opts.PageSize > 0 {
  2297. var start int
  2298. if opts.Page == 0 {
  2299. start = 0
  2300. } else {
  2301. start = (opts.Page - 1) * opts.PageSize
  2302. }
  2303. sess.Limit(opts.PageSize, start)
  2304. }
  2305. // sess.OrderBy("cloudbrain.created_unix DESC")
  2306. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  2307. if err := sess.Cols("id", "type", "work_server_number", "duration", "train_job_duration", "ai_center", "cluster").Table(&Cloudbrain{}).Unscoped().Where(cond).
  2308. Find(&cloudbrains); err != nil {
  2309. return nil, 0, fmt.Errorf("Find: %v", err)
  2310. }
  2311. if opts.NeedRepoInfo {
  2312. var ids []int64
  2313. for _, task := range cloudbrains {
  2314. ids = append(ids, task.RepoID)
  2315. }
  2316. repositoryMap, err := GetRepositoriesMapByIDs(ids)
  2317. if err == nil {
  2318. for _, task := range cloudbrains {
  2319. task.Repo = repositoryMap[task.RepoID]
  2320. }
  2321. }
  2322. }
  2323. return cloudbrains, count, nil
  2324. }
  2325. func GetStartedCloudbrainTaskByUpdatedUnix(startTime, endTime time.Time) ([]Cloudbrain, error) {
  2326. r := make([]Cloudbrain, 0)
  2327. err := x.Where("updated_unix >= ? and updated_unix <= ? and start_time > 0", startTime.Unix(), endTime.Unix()).Unscoped().Find(&r)
  2328. if err != nil {
  2329. return nil, err
  2330. }
  2331. return r, nil
  2332. }
  2333. func GetCloudbrainByIds(ids []int64) ([]*Cloudbrain, error) {
  2334. if len(ids) == 0 {
  2335. return nil, nil
  2336. }
  2337. cloudbrains := make([]*Cloudbrain, 0)
  2338. err := x.In("id", ids).Unscoped().Find(&cloudbrains)
  2339. if err != nil {
  2340. return nil, err
  2341. }
  2342. return cloudbrains, nil
  2343. }
  2344. type DatasetInfo struct {
  2345. DataLocalPath string
  2346. Name string
  2347. FullName string
  2348. Size int
  2349. Type int
  2350. }
  2351. func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) {
  2352. var datasetNames string
  2353. uuids := strings.Split(uuidStr, ";")
  2354. if len(uuids) > setting.MaxDatasetNum {
  2355. log.Error("the dataset count(%d) exceed the limit", len(uuids))
  2356. return nil, datasetNames, errors.New("the dataset count exceed the limit")
  2357. }
  2358. datasetInfos := make(map[string]DatasetInfo)
  2359. attachs, err := GetAttachmentsByUUIDs(uuids)
  2360. if err != nil {
  2361. log.Error("GetAttachmentsByUUIDs failed: %v", err)
  2362. return nil, datasetNames, err
  2363. }
  2364. for i, tmpUuid := range uuids {
  2365. var attach *Attachment
  2366. for _, tmpAttach := range attachs {
  2367. if tmpAttach.UUID == tmpUuid {
  2368. attach = tmpAttach
  2369. break
  2370. }
  2371. }
  2372. if attach == nil {
  2373. log.Error("GetAttachmentsByUUIDs failed: %v", err)
  2374. return nil, datasetNames, err
  2375. }
  2376. fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz")
  2377. for _, datasetInfo := range datasetInfos {
  2378. if fileName == datasetInfo.Name {
  2379. log.Error("the dataset name is same: %v", attach.Name)
  2380. return nil, datasetNames, errors.New("the dataset name is same")
  2381. }
  2382. }
  2383. var dataLocalPath string
  2384. if len(grampusType) > 0 {
  2385. if grampusType[0] == GPU {
  2386. dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID
  2387. } else if grampusType[0] == NPU {
  2388. dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/"
  2389. } else if grampusType[0] == GCU {
  2390. if attach.Type == TypeCloudBrainOne {
  2391. dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID
  2392. } else {
  2393. dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/"
  2394. }
  2395. }
  2396. } else {
  2397. dataLocalPath = setting.Attachment.Minio.RealPath +
  2398. setting.Attachment.Minio.Bucket + "/" +
  2399. setting.Attachment.Minio.BasePath +
  2400. AttachmentRelativePath(attach.UUID) +
  2401. attach.UUID
  2402. }
  2403. datasetInfos[attach.UUID] = DatasetInfo{
  2404. DataLocalPath: dataLocalPath,
  2405. Name: fileName,
  2406. FullName: attach.Name,
  2407. Size: int(attach.Size),
  2408. Type: attach.Type,
  2409. }
  2410. if i == 0 {
  2411. datasetNames = attach.Name
  2412. } else {
  2413. datasetNames += ";" + attach.Name
  2414. }
  2415. }
  2416. return datasetInfos, datasetNames, nil
  2417. }
  2418. var (
  2419. SpecsMapInitFlag = false
  2420. CloudbrainDebugResourceSpecsMap map[int]*ResourceSpec
  2421. CloudbrainTrainResourceSpecsMap map[int]*ResourceSpec
  2422. CloudbrainInferenceResourceSpecsMap map[int]*ResourceSpec
  2423. CloudbrainBenchmarkResourceSpecsMap map[int]*ResourceSpec
  2424. CloudbrainSpecialResourceSpecsMap map[int]*ResourceSpec
  2425. GpuInfosMapInitFlag = false
  2426. CloudbrainDebugGpuInfosMap map[string]*GpuInfo
  2427. CloudbrainTrainGpuInfosMap map[string]*GpuInfo
  2428. CloudbrainInferenceGpuInfosMap map[string]*GpuInfo
  2429. CloudbrainBenchmarkGpuInfosMap map[string]*GpuInfo
  2430. CloudbrainSpecialGpuInfosMap map[string]*GpuInfo
  2431. )
  2432. func GetNewestJobsByAiCenter() ([]int64, error) {
  2433. ids := make([]int64, 0)
  2434. return ids, x.
  2435. Select("max(id) as id").
  2436. Where("type=? and ai_center!='' and ai_center is not null", TypeC2Net).
  2437. GroupBy("ai_center").
  2438. Table(Cloudbrain{}).
  2439. Find(&ids)
  2440. }
  2441. func GetNewestJobsByType() ([]int64, error) {
  2442. ids := make([]int64, 0)
  2443. return ids, x.
  2444. Select("max(id) as id").
  2445. In("type", TypeCloudBrainOne, TypeCloudBrainTwo).
  2446. GroupBy("type").
  2447. Table(Cloudbrain{}).
  2448. Find(&ids)
  2449. }
  2450. func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) {
  2451. cloudbrains := make([]*Cloudbrain, 0)
  2452. return cloudbrains, x.
  2453. In("id", ids).
  2454. Find(&cloudbrains)
  2455. }
  2456. func GetCloudbrainWithDeletedByIDs(ids []int64) ([]*Cloudbrain, error) {
  2457. cloudbrains := make([]*Cloudbrain, 0)
  2458. return cloudbrains, x.
  2459. In("id", ids).Unscoped().Find(&cloudbrains)
  2460. }
  2461. func GetCloudbrainCountByJobName(jobName, jobType string, typeCloudbrain int) (int, error) {
  2462. count, err := x.Where("job_name = ? and job_type= ? and type = ?", jobName, jobType, typeCloudbrain).Count(new(Cloudbrain))
  2463. return int(count), err
  2464. }
  2465. func LoadSpecs(tasks []*Cloudbrain) error {
  2466. cloudbrainIds := make([]int64, len(tasks))
  2467. for i, v := range tasks {
  2468. cloudbrainIds[i] = v.ID
  2469. }
  2470. specs := make([]*CloudbrainSpec, 0)
  2471. err := x.In("cloudbrain_id", cloudbrainIds).Find(&specs)
  2472. if err != nil {
  2473. return err
  2474. }
  2475. specMap := make(map[int64]*CloudbrainSpec)
  2476. for _, v := range specs {
  2477. specMap[v.SpecId] = v
  2478. }
  2479. for _, v := range tasks {
  2480. if specMap[v.ID] != nil {
  2481. v.Spec = specMap[v.ID].ConvertToSpecification()
  2482. }
  2483. }
  2484. return nil
  2485. }
  2486. func LoadSpecs4CloudbrainInfo(tasks []*CloudbrainInfo) error {
  2487. cloudbrainIds := make([]int64, len(tasks))
  2488. for i, v := range tasks {
  2489. cloudbrainIds[i] = v.Cloudbrain.ID
  2490. }
  2491. specs := make([]*CloudbrainSpec, 0)
  2492. err := x.In("cloudbrain_id", cloudbrainIds).Find(&specs)
  2493. if err != nil {
  2494. return err
  2495. }
  2496. specMap := make(map[int64]*CloudbrainSpec)
  2497. for _, v := range specs {
  2498. specMap[v.CloudbrainID] = v
  2499. }
  2500. for _, v := range tasks {
  2501. if specMap[v.Cloudbrain.ID] != nil {
  2502. v.Cloudbrain.Spec = specMap[v.Cloudbrain.ID].ConvertToSpecification()
  2503. }
  2504. }
  2505. return nil
  2506. }