You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 49 kB

3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461
  1. package models
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "time"
  7. "xorm.io/builder"
  8. "xorm.io/xorm"
  9. "code.gitea.io/gitea/modules/log"
  10. "code.gitea.io/gitea/modules/setting"
  11. "code.gitea.io/gitea/modules/timeutil"
  12. )
  13. type CloudbrainStatus string
  14. type JobType string
  15. type ModelArtsJobStatus string
  16. const (
  17. NPUResource = "NPU"
  18. GPUResource = "CPU/GPU"
  19. //notebook storage category
  20. EVSCategory = "EVS"
  21. EFSCategory = "EFS"
  22. ManagedOwnership = "MANAGED"
  23. DetectedOwnership = "DEDICATED"
  24. NotebookFeature = "NOTEBOOK"
  25. DefaultFeature = "DEFAULT"
  26. JobWaiting CloudbrainStatus = "WAITING"
  27. JobStopped CloudbrainStatus = "STOPPED"
  28. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  29. JobFailed CloudbrainStatus = "FAILED"
  30. JobRunning CloudbrainStatus = "RUNNING"
  31. JobTypeDebug JobType = "DEBUG"
  32. JobTypeBenchmark JobType = "BENCHMARK"
  33. JobTypeSnn4imagenet JobType = "SNN4IMAGENET"
  34. JobTypeBrainScore JobType = "BRAINSCORE"
  35. JobTypeTrain JobType = "TRAIN"
  36. JobTypeInference JobType = "INFERENCE"
  37. //notebook
  38. ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
  39. ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
  40. ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
  41. ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中
  42. ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动
  43. ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中
  44. ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中
  45. ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败
  46. ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中
  47. ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中
  48. ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止
  49. ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障
  50. ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
  51. ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
  52. ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败
  53. //trainjob
  54. ModelArtsTrainJobUnknown ModelArtsJobStatus = "UNKNOWN" //作业状态未知
  55. ModelArtsTrainJobInit ModelArtsJobStatus = "INIT" //作业初始化状态
  56. ModelArtsTrainJobImageCreating ModelArtsJobStatus = "IMAGE_CREATING" //作业镜像正在创建
  57. ModelArtsTrainJobImageFailed ModelArtsJobStatus = "IMAGE_FAILED" //作业镜像创建失败
  58. ModelArtsTrainJobSubmitTrying ModelArtsJobStatus = "SUBMIT_TRYING" //作业正在提交
  59. ModelArtsTrainJobSubmitFailed ModelArtsJobStatus = "SUBMIT_FAILED" //作业提交失败
  60. ModelArtsTrainJobDeleteFailed ModelArtsJobStatus = "DELETE_FAILED" //作业删除失败
  61. ModelArtsTrainJobWaiting ModelArtsJobStatus = "WAITING" //作业正在排队中
  62. ModelArtsTrainJobRunning ModelArtsJobStatus = "RUNNING" //作业正在运行中
  63. ModelArtsTrainJobKilling ModelArtsJobStatus = "KILLING" //作业正在取消
  64. ModelArtsTrainJobCompleted ModelArtsJobStatus = "COMPLETED" //作业已经完成
  65. ModelArtsTrainJobFailed ModelArtsJobStatus = "FAILED" //作业运行失败
  66. ModelArtsTrainJobKilled ModelArtsJobStatus = "KILLED" //作业取消成功
  67. ModelArtsTrainJobCanceled ModelArtsJobStatus = "CANCELED" //作业取消
  68. ModelArtsTrainJobLost ModelArtsJobStatus = "LOST" //作业丢失
  69. ModelArtsTrainJobScaling ModelArtsJobStatus = "SCALING" //作业正在扩容
  70. ModelArtsTrainJobSubmitModelFailed ModelArtsJobStatus = "SUBMIT_MODEL_FAILED" //提交模型失败
  71. ModelArtsTrainJobDeployServiceFailed ModelArtsJobStatus = "DEPLOY_SERVICE_FAILED" //部署服务失败
  72. ModelArtsTrainJobCheckInit ModelArtsJobStatus = "CHECK_INIT" //审核作业初始化
  73. ModelArtsTrainJobCheckRunning ModelArtsJobStatus = "CHECK_RUNNING" //审核作业正在运行中
  74. ModelArtsTrainJobCheckRunningCompleted ModelArtsJobStatus = "CHECK_RUNNING_COMPLETED" //审核作业已经完成
  75. ModelArtsTrainJobCheckFailed ModelArtsJobStatus = "CHECK_FAILED" //审核作业失败
  76. )
  77. type Cloudbrain struct {
  78. ID int64 `xorm:"pk autoincr"`
  79. JobID string `xorm:"INDEX NOT NULL"`
  80. JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
  81. JobName string
  82. Status string
  83. UserID int64
  84. RepoID int64
  85. SubTaskName string
  86. ContainerID string
  87. ContainerIp string
  88. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  89. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  90. Duration int64
  91. TrainJobDuration string
  92. Image string //GPU镜像名称
  93. GpuQueue string //GPU类型即GPU队列
  94. ResourceSpecId int //GPU规格id
  95. DeletedAt time.Time `xorm:"deleted"`
  96. CanDebug bool `xorm:"-"`
  97. CanDel bool `xorm:"-"`
  98. CanModify bool `xorm:"-"`
  99. Type int
  100. BenchmarkTypeID int
  101. BenchmarkChildTypeID int
  102. VersionID int64 //版本id
  103. VersionName string `xorm:"INDEX"` //当前版本
  104. Uuid string //数据集id
  105. DatasetName string
  106. VersionCount int //任务的当前版本数量,不包括删除的
  107. IsLatestVersion string //是否是最新版本,1是,0否
  108. CommitID string //提交的仓库代码id
  109. PreVersionName string //父版本名称
  110. ComputeResource string //计算资源,例如npu
  111. EngineID int64 //引擎id
  112. TrainUrl string //输出模型的obs路径
  113. BranchName string //分支名称
  114. Parameters string //传给modelarts的param参数
  115. BootFile string //启动文件
  116. DataUrl string //数据集的obs路径
  117. LogUrl string //日志输出的obs路径
  118. PreVersionId int64 //父版本的版本id
  119. FlavorCode string //modelarts上的规格id
  120. Description string `xorm:"varchar(256)"` //描述
  121. WorkServerNumber int //节点数
  122. FlavorName string //规格名称
  123. EngineName string //引擎名称
  124. TotalVersionCount int //任务的所有版本数量,包括删除的
  125. LabelName string //标签名称
  126. ModelName string //模型名称
  127. ModelVersion string //模型版本
  128. CkptName string //权重文件名称
  129. ResultUrl string //推理结果的obs路径
  130. User *User `xorm:"-"`
  131. Repo *Repository `xorm:"-"`
  132. }
  133. type CloudbrainInfo struct {
  134. Cloudbrain `xorm:"extends"`
  135. User `xorm:"extends"`
  136. }
  137. type CloudBrainLoginResult struct {
  138. Code string
  139. Msg string
  140. Payload map[string]interface{}
  141. }
  142. type TaskRole struct {
  143. Name string `json:"name"`
  144. TaskNumber int `json:"taskNumber"`
  145. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  146. MinFailedTaskCount int `json:"minFailedTaskCount"`
  147. CPUNumber int `json:"cpuNumber"`
  148. GPUNumber int `json:"gpuNumber"`
  149. MemoryMB int `json:"memoryMB"`
  150. ShmMB int `json:"shmMB"`
  151. Command string `json:"command"`
  152. NeedIBDevice bool `json:"needIBDevice"`
  153. IsMainRole bool `json:"isMainRole"`
  154. UseNNI bool `json:"useNNI"`
  155. }
  156. type StHostPath struct {
  157. Path string `json:"path"`
  158. MountPath string `json:"mountPath"`
  159. ReadOnly bool `json:"readOnly"`
  160. }
  161. type Volume struct {
  162. HostPath StHostPath `json:"hostPath"`
  163. }
  164. type CreateJobParams struct {
  165. JobName string `json:"jobName"`
  166. RetryCount int8 `json:"retryCount"`
  167. GpuType string `json:"gpuType"`
  168. Image string `json:"image"`
  169. TaskRoles []TaskRole `json:"taskRoles"`
  170. Volumes []Volume `json:"volumes"`
  171. }
  172. type CreateJobResult struct {
  173. Code string `json:"code"`
  174. Msg string `json:"msg"`
  175. Payload map[string]interface{} `json:"payload"`
  176. }
  177. type GetJobResult struct {
  178. Code string `json:"code"`
  179. Msg string `json:"msg"`
  180. Payload map[string]interface{} `json:"payload"`
  181. }
  182. type GetImagesResult struct {
  183. Code string `json:"code"`
  184. Msg string `json:"msg"`
  185. Payload GetImagesPayload `json:"payload"`
  186. }
  187. type GetImagesPayload struct {
  188. Count int `json:"count"`
  189. TotalPages int `json:"totalPages,omitempty"`
  190. ImageInfo []*ImageInfo `json:"rows"`
  191. }
  192. type CloudbrainsOptions struct {
  193. ListOptions
  194. RepoID int64 // include all repos if empty
  195. UserID int64
  196. JobID string
  197. SortType string
  198. CloudbrainIDs []int64
  199. JobStatus []string
  200. JobStatusNot bool
  201. Keyword string
  202. Type int
  203. JobTypes []string
  204. VersionName string
  205. IsLatestVersion string
  206. JobTypeNot bool
  207. NeedRepoInfo bool
  208. }
  209. type TaskPod struct {
  210. TaskRoleStatus struct {
  211. Name string `json:"name"`
  212. } `json:"taskRoleStatus"`
  213. //TaskStatuses []struct {
  214. // TaskIndex int `json:"taskIndex"`
  215. // PodUID string `json:"podUid"`
  216. // PodIP string `json:"podIp"`
  217. // PodName string `json:"podName"`
  218. // ContainerID string `json:"containerId"`
  219. // ContainerIP string `json:"containerIp"`
  220. // ContainerGpus string `json:"containerGpus"`
  221. // State string `json:"state"`
  222. // StartAt time.Time `json:"startAt"`
  223. // FinishedAt time.Time `json:"finishedAt"`
  224. // ExitCode int `json:"exitCode"`
  225. // ExitDiagnostics string `json:"exitDiagnostics"`
  226. // RetriedCount int `json:"retriedCount"`
  227. // StartTime string
  228. // FinishedTime string
  229. //} `json:"taskStatuses"`
  230. TaskStatuses []TaskStatuses `json:"taskStatuses"`
  231. }
  232. type TaskStatuses struct {
  233. TaskIndex int `json:"taskIndex"`
  234. PodUID string `json:"podUid"`
  235. PodIP string `json:"podIp"`
  236. PodName string `json:"podName"`
  237. ContainerID string `json:"containerId"`
  238. ContainerIP string `json:"containerIp"`
  239. ContainerGpus string `json:"containerGpus"`
  240. State string `json:"state"`
  241. StartAt time.Time `json:"startAt"`
  242. FinishedAt time.Time `json:"finishedAt"`
  243. ExitCode int `json:"exitCode"`
  244. ExitDiagnostics string `json:"exitDiagnostics"`
  245. RetriedCount int `json:"retriedCount"`
  246. StartTime string
  247. FinishedTime string
  248. }
  249. type TaskInfo struct {
  250. Username string `json:"username"`
  251. TaskName string `json:"task_name"`
  252. CodeName string `json:"code_name"`
  253. BenchmarkCategory []string `json:"selected_category"`
  254. CodeLink string `json:"code_link"`
  255. GpuType string `json:"gpu_type"`
  256. }
  257. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  258. data, _ := json.Marshal(input)
  259. var taskPod TaskPod
  260. err := json.Unmarshal(data, &taskPod)
  261. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  262. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  263. //if the task is not finished or stopped,the cloudbrain renturns 0001-01-01 08:00:00, the finishedTime shows with -
  264. if strings.HasPrefix(taskPod.TaskStatuses[0].FinishedTime, "0001") {
  265. taskPod.TaskStatuses[0].FinishedTime = "-"
  266. }
  267. return taskPod, err
  268. }
  269. type JobResultPayload struct {
  270. ID string `json:"id"`
  271. Name string `json:"name"`
  272. Platform string `json:"platform"`
  273. JobStatus struct {
  274. Username string `json:"username"`
  275. State string `json:"state"`
  276. SubState string `json:"subState"`
  277. ExecutionType string `json:"executionType"`
  278. Retries int `json:"retries"`
  279. CreatedTime int64 `json:"createdTime"`
  280. CompletedTime int64 `json:"completedTime"`
  281. AppID string `json:"appId"`
  282. AppProgress string `json:"appProgress"`
  283. AppTrackingURL string `json:"appTrackingUrl"`
  284. AppLaunchedTime int64 `json:"appLaunchedTime"`
  285. AppCompletedTime interface{} `json:"appCompletedTime"`
  286. AppExitCode int `json:"appExitCode"`
  287. AppExitDiagnostics string `json:"appExitDiagnostics"`
  288. AppExitType interface{} `json:"appExitType"`
  289. VirtualCluster string `json:"virtualCluster"`
  290. StartTime string
  291. EndTime string
  292. } `json:"jobStatus"`
  293. TaskRoles map[string]interface{} `json:"taskRoles"`
  294. Resource struct {
  295. CPU int `json:"cpu"`
  296. Memory string `json:"memory"`
  297. NvidiaComGpu int `json:"nvidia.com/gpu"`
  298. } `json:"resource"`
  299. Config struct {
  300. Image string `json:"image"`
  301. JobID string `json:"jobId"`
  302. GpuType string `json:"gpuType"`
  303. JobName string `json:"jobName"`
  304. JobType string `json:"jobType"`
  305. TaskRoles []struct {
  306. Name string `json:"name"`
  307. ShmMB int `json:"shmMB"`
  308. Command string `json:"command"`
  309. MemoryMB int `json:"memoryMB"`
  310. CPUNumber int `json:"cpuNumber"`
  311. GpuNumber int `json:"gpuNumber"`
  312. IsMainRole bool `json:"isMainRole"`
  313. TaskNumber int `json:"taskNumber"`
  314. NeedIBDevice bool `json:"needIBDevice"`
  315. MinFailedTaskCount int `json:"minFailedTaskCount"`
  316. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  317. } `json:"taskRoles"`
  318. RetryCount int `json:"retryCount"`
  319. } `json:"config"`
  320. Userinfo struct {
  321. User string `json:"user"`
  322. OrgID string `json:"org_id"`
  323. } `json:"userinfo"`
  324. }
  325. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  326. data, _ := json.Marshal(input)
  327. var jobResultPayload JobResultPayload
  328. err := json.Unmarshal(data, &jobResultPayload)
  329. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  330. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  331. if jobResultPayload.JobStatus.State == string(JobWaiting) {
  332. jobResultPayload.JobStatus.StartTime = "-"
  333. jobResultPayload.JobStatus.EndTime = "-"
  334. }
  335. return jobResultPayload, err
  336. }
  337. type ImagesResultPayload struct {
  338. Images []struct {
  339. ID int `json:"id"`
  340. Name string `json:"name"`
  341. Place string `json:"place"`
  342. Description string `json:"description"`
  343. Provider string `json:"provider"`
  344. Createtime string `json:"createtime"`
  345. Remark string `json:"remark"`
  346. } `json:"taskStatuses"`
  347. }
  348. type ImageInfo struct {
  349. ID int `json:"id"`
  350. Name string `json:"name"`
  351. Place string `json:"place"`
  352. Description string `json:"description"`
  353. Provider string `json:"provider"`
  354. Createtime string `json:"createtime"`
  355. Remark string `json:"remark"`
  356. IsPublic int `json:"isPublic"`
  357. PlaceView string
  358. }
  359. type Categories struct {
  360. Category []*Category `json:"category"`
  361. }
  362. type Category struct {
  363. Id int `json:"id"`
  364. Value string `json:"value"`
  365. }
  366. type BenchmarkTypes struct {
  367. BenchmarkType []*BenchmarkType `json:"type"`
  368. }
  369. type BenchmarkType struct {
  370. Id int `json:"id"`
  371. First string `json:"first"` //一级算法类型名称
  372. Second []*BenchmarkDataset `json:"second"`
  373. }
  374. type BenchmarkDataset struct {
  375. Id int `json:"id"`
  376. Value string `json:"value"` //二级算法类型名称
  377. Attachment string `json:"attachment"` //数据集的uuid
  378. Owner string `json:"owner"` //评估脚本所在仓库的拥有者
  379. RepoName string `json:"repo_name"` //评估脚本所在仓库的名称
  380. }
  381. type GpuInfos struct {
  382. GpuInfo []*GpuInfo `json:"gpu_type"`
  383. }
  384. type GpuInfo struct {
  385. Id int `json:"id"`
  386. Value string `json:"value"`
  387. Queue string `json:"queue"`
  388. }
  389. type ResourceSpecs struct {
  390. ResourceSpec []*ResourceSpec `json:"resorce_specs"`
  391. }
  392. type ResourceSpec struct {
  393. Id int `json:"id"`
  394. CpuNum int `json:"cpu"`
  395. GpuNum int `json:"gpu"`
  396. MemMiB int `json:"memMiB"`
  397. ShareMemMiB int `json:"shareMemMiB"`
  398. }
  399. type FlavorInfos struct {
  400. FlavorInfo []*FlavorInfo `json:"flavor_info"`
  401. }
  402. type FlavorInfo struct {
  403. Id int `json:"id"`
  404. Value string `json:"value"`
  405. Desc string `json:"desc"`
  406. }
  407. type PoolInfos struct {
  408. PoolInfo []*PoolInfo `json:"pool_info"`
  409. }
  410. type PoolInfo struct {
  411. PoolId string `json:"pool_id"`
  412. PoolName string `json:"pool_name"`
  413. PoolType string `json:"pool_type"`
  414. }
  415. type CommitImageParams struct {
  416. Ip string `json:"ip"`
  417. TaskContainerId string `json:"taskContainerId"`
  418. ImageTag string `json:"imageTag"`
  419. ImageDescription string `json:"imageDescription"`
  420. }
  421. type CommitImageResult struct {
  422. Code string `json:"code"`
  423. Msg string `json:"msg"`
  424. Payload map[string]interface{} `json:"payload"`
  425. }
  426. type GetJobLogParams struct {
  427. Size string `json:"size"`
  428. Sort string `json:"sort"`
  429. QueryInfo QueryInfo `json:"query"`
  430. }
  431. type QueryInfo struct {
  432. MatchInfo MatchInfo `json:"match"`
  433. }
  434. type MatchInfo struct {
  435. PodName string `json:"kubernetes.pod.name"`
  436. }
  437. type GetJobLogResult struct {
  438. ScrollID string `json:"_scroll_id"`
  439. Took int `json:"took"`
  440. TimedOut bool `json:"timed_out"`
  441. Shards struct {
  442. Total int `json:"total"`
  443. Successful int `json:"successful"`
  444. Skipped int `json:"skipped"`
  445. Failed int `json:"failed"`
  446. } `json:"_shards"`
  447. Hits struct {
  448. Hits []Hits `json:"hits"`
  449. } `json:"hits"`
  450. }
  451. type Hits struct {
  452. Index string `json:"_index"`
  453. Type string `json:"_type"`
  454. ID string `json:"_id"`
  455. Source struct {
  456. Message string `json:"message"`
  457. } `json:"_source"`
  458. Sort []int `json:"sort"`
  459. }
  460. type GetAllJobLogParams struct {
  461. Scroll string `json:"scroll"`
  462. ScrollID string `json:"scroll_id"`
  463. }
  464. type DeleteJobLogTokenParams struct {
  465. ScrollID string `json:"scroll_id"`
  466. }
  467. type DeleteJobLogTokenResult struct {
  468. Succeeded bool `json:"succeeded"`
  469. NumFreed int `json:"num_freed"`
  470. }
  471. type CloudBrainResult struct {
  472. Code string `json:"code"`
  473. Msg string `json:"msg"`
  474. }
  475. type CreateNotebook2Params struct {
  476. JobName string `json:"name"`
  477. Description string `json:"description"`
  478. Duration int64 `json:"duration"` //ms
  479. Feature string `json:"feature"`
  480. PoolID string `json:"pool_id"`
  481. Flavor string `json:"flavor"`
  482. ImageID string `json:"image_id"`
  483. WorkspaceID string `json:"workspace_id"`
  484. Volume VolumeReq `json:"volume"`
  485. }
  486. type VolumeReq struct {
  487. Capacity int `json:"capacity"`
  488. Category string `json:"category"`
  489. Ownership string `json:"ownership"`
  490. Uri string `json:"uri"`
  491. }
  492. type CreateNotebookParams struct {
  493. JobName string `json:"name"`
  494. Description string `json:"description"`
  495. ProfileID string `json:"profile_id"`
  496. Flavor string `json:"flavor"`
  497. Spec Spec `json:"spec"`
  498. Workspace Workspace `json:"workspace"`
  499. Pool Pool `json:"pool"`
  500. }
  501. type Pool struct {
  502. ID string `json:"id"`
  503. Name string `json:"name"`
  504. Type string `json:"type"`
  505. }
  506. type Workspace struct {
  507. ID string `json:"id"`
  508. }
  509. type Spec struct {
  510. Storage Storage `json:"storage"`
  511. AutoStop AutoStop `json:"auto_stop"`
  512. }
  513. type AutoStop struct {
  514. Enable bool `json:"enable"`
  515. Duration int `json:"duration"`
  516. }
  517. type Storage struct {
  518. Type string `json:"type"`
  519. Location Location `json:"location"`
  520. }
  521. type Location struct {
  522. Path string `json:"path"`
  523. }
  524. type NotebookResult struct {
  525. ErrorCode string `json:"error_code"`
  526. ErrorMsg string `json:"error_msg"`
  527. }
  528. type CreateNotebookResult struct {
  529. ErrorCode string `json:"error_code"`
  530. ErrorMsg string `json:"error_msg"`
  531. ID string `json:"id"`
  532. Name string `json:"name"`
  533. Description string `json:"description"`
  534. Status string `json:"status"`
  535. CreationTimestamp string `json:"creation_timestamp"`
  536. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  537. Profile struct {
  538. ID string `json:"id"`
  539. Name string `json:"name"`
  540. Description string `json:"description"`
  541. DeType string `json:"de_type"`
  542. FlavorType string `json:"flavor_type"`
  543. } `json:"profile"`
  544. Flavor string `json:"flavor"`
  545. FlavorDetails struct {
  546. Name string `json:"name"`
  547. Status string `json:"status"`
  548. QueuingNum int `json:"queuing_num"`
  549. QueueLeftTime int `json:"queue_left_time"` //s
  550. Duration int `json:"duration"` //auto_stop_time s
  551. } `json:"flavor_details"`
  552. }
  553. type GetNotebookResult struct {
  554. ErrorCode string `json:"error_code"`
  555. ErrorMsg string `json:"error_msg"`
  556. ID string `json:"id"`
  557. Name string `json:"name"`
  558. Description string `json:"description"`
  559. Status string `json:"status"`
  560. CreationTimestamp string `json:"creation_timestamp"`
  561. CreateTime string
  562. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  563. LatestUpdateTime string
  564. Profile struct {
  565. ID string `json:"id"`
  566. Name string `json:"name"`
  567. Description string `json:"description"`
  568. DeType string `json:"de_type"`
  569. FlavorType string `json:"flavor_type"`
  570. } `json:"profile"`
  571. Flavor string `json:"flavor"`
  572. FlavorDetails struct {
  573. Name string `json:"name"`
  574. Status string `json:"status"`
  575. QueuingNum int `json:"queuing_num"`
  576. QueueLeftTime int `json:"queue_left_time"` //s
  577. Duration int `json:"duration"` //auto_stop_time s
  578. } `json:"flavor_details"`
  579. QueuingInfo struct {
  580. ID string `json:"id"`
  581. Name string `json:"name"`
  582. Flavor string `json:"flavor"`
  583. DeType string `json:"de_type"`
  584. Status string `json:"status"`
  585. BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue
  586. BeginTime string
  587. RemainTime int `json:"remain_time"` //remain time of instance
  588. EndTimestamp int `json:"end_timestamp"` //
  589. EndTime string
  590. Rank int `json:"rank"` //rank of instance in queue
  591. } `json:"queuing_info"`
  592. Spec struct {
  593. Annotations struct {
  594. TargetDomain string `json:"target_domain"`
  595. Url string `json:"url"`
  596. } `json:"annotations"`
  597. } `json:"spec"`
  598. }
  599. type GetNotebook2Result struct {
  600. ErrorCode string `json:"error_code"`
  601. ErrorMsg string `json:"error_msg"`
  602. FailReason string `json:"fail_reason"`
  603. ID string `json:"id"`
  604. Name string `json:"name"`
  605. Description string `json:"description"`
  606. Status string `json:"status"`
  607. Url string `json:"url"` //实例访问的URL
  608. Token string `json:"token"` //notebook鉴权使用的token信息
  609. Flavor string `json:"flavor"`
  610. CreateTime string
  611. LatestUpdateTime string
  612. CreateAt int64 `json:"create_at"` //实例创建的时间,UTC毫秒
  613. UpdateAt int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒
  614. Image struct {
  615. Name string `json:"name"`
  616. Status string `json:"status"`
  617. QueuingNum int `json:"queuing_num"`
  618. QueueLeftTime int `json:"queue_left_time"` //s
  619. Duration int `json:"duration"` //auto_stop_time s
  620. } `json:"image"`
  621. Lease struct {
  622. CreateTime int64 `json:"create_at"` //实例创建的时间,UTC毫秒
  623. Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例
  624. UpdateTime int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒
  625. } `json:"lease"` //实例自动停止的倒计时信息
  626. VolumeRes struct {
  627. Capacity int `json:"capacity"`
  628. Category string `json:"category"`
  629. MountPath string `json:"mount_path"`
  630. Ownership string `json:"ownership"`
  631. Status string `json:"status"`
  632. } `json:"volume"`
  633. }
  634. type GetTokenParams struct {
  635. Auth Auth `json:"auth"`
  636. }
  637. type Auth struct {
  638. Identity Identity `json:"identity"`
  639. Scope Scope `json:"scope"`
  640. }
  641. type Scope struct {
  642. Project Project `json:"project"`
  643. }
  644. type Project struct {
  645. Name string `json:"name"`
  646. }
  647. type Identity struct {
  648. Methods []string `json:"methods"`
  649. Password Password `json:"password"`
  650. }
  651. type Password struct {
  652. User NotebookUser `json:"user"`
  653. }
  654. type NotebookUser struct {
  655. Name string `json:"name"`
  656. Password string `json:"password"`
  657. Domain Domain `json:"domain"`
  658. }
  659. type Domain struct {
  660. Name string `json:"name"`
  661. }
  662. const (
  663. ActionStart = "start"
  664. ActionStop = "stop"
  665. ActionRestart = "restart"
  666. ActionQueue = "queue"
  667. ActionDequeue = "dequeue"
  668. )
  669. type NotebookAction struct {
  670. Action string `json:"action"`
  671. }
  672. type NotebookActionResult struct {
  673. ErrorCode string `json:"error_code"`
  674. ErrorMsg string `json:"error_msg"`
  675. CurrentStatus string `json:"current_status"`
  676. PreviousState string `json:"previous_state"`
  677. Status string `json:"status"`
  678. }
  679. type NotebookGetJobTokenResult struct {
  680. ErrorCode string `json:"error_code"`
  681. ErrorMsg string `json:"error_msg"`
  682. Token string `json:"token"`
  683. }
  684. type NotebookDelResult struct {
  685. InstanceID string `json:"instance_id"`
  686. }
  687. type CreateTrainJobParams struct {
  688. JobName string `json:"job_name"`
  689. Description string `json:"job_desc"`
  690. Config Config `json:"config"`
  691. WorkspaceID string `json:"workspace_id"`
  692. }
  693. type Config struct {
  694. WorkServerNum int `json:"worker_server_num"`
  695. AppUrl string `json:"app_url"` //训练作业的代码目录
  696. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  697. Parameter []Parameter `json:"parameter"`
  698. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  699. EngineID int64 `json:"engine_id"`
  700. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  701. LogUrl string `json:"log_url"`
  702. //UserImageUrl string `json:"user_image_url"`
  703. //UserCommand string `json:"user_command"`
  704. CreateVersion bool `json:"create_version"`
  705. Flavor Flavor `json:"flavor"`
  706. PoolID string `json:"pool_id"`
  707. }
  708. type CreateInferenceJobParams struct {
  709. JobName string `json:"job_name"`
  710. Description string `json:"job_desc"`
  711. InfConfig InfConfig `json:"config"`
  712. WorkspaceID string `json:"workspace_id"`
  713. }
  714. type InfConfig struct {
  715. WorkServerNum int `json:"worker_server_num"`
  716. AppUrl string `json:"app_url"` //训练作业的代码目录
  717. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  718. Parameter []Parameter `json:"parameter"`
  719. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  720. EngineID int64 `json:"engine_id"`
  721. LogUrl string `json:"log_url"`
  722. CreateVersion bool `json:"create_version"`
  723. Flavor Flavor `json:"flavor"`
  724. PoolID string `json:"pool_id"`
  725. }
  726. type CreateTrainJobVersionParams struct {
  727. Description string `json:"job_desc"`
  728. Config TrainJobVersionConfig `json:"config"`
  729. }
  730. type TrainJobVersionConfig struct {
  731. WorkServerNum int `json:"worker_server_num"`
  732. AppUrl string `json:"app_url"` //训练作业的代码目录
  733. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  734. Parameter []Parameter `json:"parameter"`
  735. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  736. EngineID int64 `json:"engine_id"`
  737. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  738. LogUrl string `json:"log_url"`
  739. Flavor Flavor `json:"flavor"`
  740. PoolID string `json:"pool_id"`
  741. PreVersionId int64 `json:"pre_version_id"`
  742. }
  743. type CreateConfigParams struct {
  744. ConfigName string `json:"config_name"`
  745. Description string `json:"config_desc"`
  746. WorkServerNum int `json:"worker_server_num"`
  747. AppUrl string `json:"app_url"` //训练作业的代码目录
  748. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  749. Parameter []Parameter `json:"parameter"`
  750. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  751. EngineID int64 `json:"engine_id"`
  752. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  753. LogUrl string `json:"log_url"`
  754. Flavor Flavor `json:"flavor"`
  755. PoolID string `json:"pool_id"`
  756. }
  757. type Parameter struct {
  758. Label string `json:"label"`
  759. Value string `json:"value"`
  760. }
  761. type Parameters struct {
  762. Parameter []Parameter `json:"parameter"`
  763. }
  764. type DataSource struct {
  765. DatasetID string `json:"dataset_id"`
  766. DatasetVersion string `json:"dataset_version"`
  767. Type string `json:"type"`
  768. DataUrl string `json:"data_url"`
  769. }
  770. type Volumes struct {
  771. Nfs Nfs `json:"nfs"`
  772. HostPath HostPath `json:"host_path"`
  773. }
  774. type Nfs struct {
  775. ID string `json:"id"`
  776. SourcePath string `json:"src_path"`
  777. DestPath string `json:"dest_path"`
  778. ReadOnly bool `json:"read_only"`
  779. }
  780. type HostPath struct {
  781. SourcePath string `json:"src_path"`
  782. DestPath string `json:"dest_path"`
  783. ReadOnly bool `json:"read_only"`
  784. }
  785. type Flavor struct {
  786. Code string `json:"code"`
  787. }
  788. type CreateTrainJobResult struct {
  789. ErrorCode string `json:"error_code"`
  790. ErrorMsg string `json:"error_msg"`
  791. IsSuccess bool `json:"is_success"`
  792. JobName string `json:"job_name"`
  793. JobID int64 `json:"job_id"`
  794. Status int `json:"status"`
  795. CreateTime int64 `json:"create_time"`
  796. VersionID int64 `json:"version_id"`
  797. ResourceID string `json:"resource_id"`
  798. VersionName string `json:"version_name"`
  799. }
  800. type CreateTrainJobConfigResult struct {
  801. ErrorCode string `json:"error_code"`
  802. ErrorMsg string `json:"error_msg"`
  803. IsSuccess bool `json:"is_success"`
  804. }
  805. type GetResourceSpecsResult struct {
  806. ErrorCode string `json:"error_code"`
  807. ErrorMsg string `json:"error_msg"`
  808. IsSuccess bool `json:"is_success"`
  809. SpecTotalCount int `json:"spec_total_count"`
  810. Specs []Specs `json:"specs"`
  811. }
  812. type Specs struct {
  813. Core string `json:"core"`
  814. Cpu string `json:"cpu"`
  815. IsNoResource bool `json:"no_resource"`
  816. GpuType string `json:"gpu_type"`
  817. SpecID int64 `json:"spec_id"`
  818. GpuNum int `json:"gpu_num"`
  819. SpecCode string `json:"spec_code"`
  820. Storage string `json:"storage"`
  821. MaxNum int `json:"max_num"`
  822. UnitNum int `json:"unit_num"`
  823. InterfaceType int `json:"interface_type"`
  824. }
  825. type GetConfigListResult struct {
  826. ErrorCode string `json:"error_code"`
  827. ErrorMsg string `json:"error_msg"`
  828. IsSuccess bool `json:"is_success"`
  829. ConfigTotalCount int `json:"config_total_count"`
  830. ParaConfigs []ParaConfig `json:"configs"`
  831. }
  832. type ParaConfig struct {
  833. ConfigName string `json:"config_name"`
  834. ConfigDesc string `json:"config_desc"`
  835. CreateTime int64 `json:"create_time"`
  836. EngineType int `json:"engine_type"`
  837. EngineName string `json:"engine_name"`
  838. EngineId int64 `json:"engine_id"`
  839. EngineVersion string `json:"engine_version"`
  840. UserImageUrl string `json:"user_image_url"`
  841. UserCommand string `json:"user_command"`
  842. Result GetConfigResult
  843. }
  844. type GetConfigResult struct {
  845. ErrorCode string `json:"error_code"`
  846. ErrorMsg string `json:"error_msg"`
  847. IsSuccess bool `json:"is_success"`
  848. ConfigName string `json:"config_name"`
  849. Description string `json:"config_desc"`
  850. WorkServerNum int `json:"worker_server_num"`
  851. AppUrl string `json:"app_url"` //训练作业的代码目录
  852. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  853. Parameter []Parameter `json:"parameter"`
  854. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  855. EngineID int64 `json:"engine_id"`
  856. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  857. LogUrl string `json:"log_url"`
  858. Flavor Flavor `json:"flavor"`
  859. PoolID string `json:"pool_id"`
  860. }
  861. type ErrorResult struct {
  862. ErrorCode string `json:"error_code"`
  863. ErrorMsg string `json:"error_message"`
  864. IsSuccess bool `json:"is_success"`
  865. }
  866. type GetTrainJobResult struct {
  867. IsSuccess bool `json:"is_success"`
  868. JobName string `json:"job_name"`
  869. JobID int64 `json:"job_id"`
  870. Description string `json:"job_desc"`
  871. IntStatus int `json:"status"`
  872. Status string
  873. LongCreateTime int64 `json:"create_time"`
  874. CreateTime string
  875. Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒
  876. TrainJobDuration string //训练作业的运行时间,格式为hh:mm:ss
  877. VersionID int64 `json:"version_id"`
  878. ResourceID string `json:"resource_id"`
  879. VersionName string `json:"version_name"`
  880. PreVersionID int64 `json:"pre_version_id"`
  881. WorkServerNum int `json:"worker_server_num"`
  882. AppUrl string `json:"app_url"` //训练作业的代码目录
  883. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  884. Parameter []Parameter `json:"parameter"`
  885. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  886. EngineID int64 `json:"engine_id"`
  887. EngineName string `json:"engine_name"`
  888. EngineVersion string `json:"engine_version"`
  889. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  890. LogUrl string `json:"log_url"`
  891. Flavor Flavor `json:"flavor"`
  892. PoolID string `json:"pool_id"`
  893. PoolName string `json:"pool_name"`
  894. NasMountPath string `json:"nas_mount_path"`
  895. NasShareAddr string `json:"nas_share_addr"`
  896. DatasetName string
  897. ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话
  898. }
  899. type GetTrainJobLogResult struct {
  900. ErrorCode string `json:"error_code"`
  901. ErrorMsg string `json:"error_msg"`
  902. IsSuccess bool `json:"is_success"`
  903. Content string `json:"content"`
  904. Lines int `json:"lines"`
  905. StartLine string `json:"start_line"`
  906. EndLine string `json:"end_line"`
  907. }
  908. type GetTrainJobLogFileNamesResult struct {
  909. ErrorCode string `json:"error_code"`
  910. ErrorMsg string `json:"error_msg"`
  911. IsSuccess bool `json:"is_success"`
  912. LogFileList []string `json:"log_file_list"`
  913. }
  914. type TrainJobResult struct {
  915. ErrorCode string `json:"error_code"`
  916. ErrorMsg string `json:"error_msg"`
  917. IsSuccess bool `json:"is_success"`
  918. }
  919. type LogFile struct {
  920. Name string
  921. }
  922. func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  923. sess := x.NewSession()
  924. defer sess.Close()
  925. var cond = builder.NewCond()
  926. if opts.RepoID > 0 {
  927. cond = cond.And(
  928. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  929. )
  930. }
  931. if opts.UserID > 0 {
  932. cond = cond.And(
  933. builder.Eq{"cloudbrain.user_id": opts.UserID},
  934. )
  935. }
  936. if (opts.JobID) != "" {
  937. cond = cond.And(
  938. builder.Eq{"cloudbrain.job_id": opts.JobID},
  939. )
  940. }
  941. if (opts.Type) >= 0 {
  942. cond = cond.And(
  943. builder.Eq{"cloudbrain.type": opts.Type},
  944. )
  945. }
  946. if len(opts.JobTypes) > 0 {
  947. if opts.JobTypeNot {
  948. cond = cond.And(
  949. builder.NotIn("cloudbrain.job_type", opts.JobTypes),
  950. )
  951. } else {
  952. cond = cond.And(
  953. builder.In("cloudbrain.job_type", opts.JobTypes),
  954. )
  955. }
  956. }
  957. if (opts.IsLatestVersion) != "" {
  958. cond = cond.And(
  959. builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion},
  960. )
  961. }
  962. if len(opts.CloudbrainIDs) > 0 {
  963. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  964. }
  965. if len(opts.JobStatus) > 0 {
  966. if opts.JobStatusNot {
  967. cond = cond.And(
  968. builder.NotIn("cloudbrain.status", opts.JobStatus),
  969. )
  970. } else {
  971. cond = cond.And(
  972. builder.In("cloudbrain.status", opts.JobStatus),
  973. )
  974. }
  975. }
  976. var count int64
  977. var err error
  978. condition := "cloudbrain.user_id = `user`.id"
  979. if len(opts.Keyword) == 0 {
  980. count, err = sess.Where(cond).Count(new(Cloudbrain))
  981. } else {
  982. lowerKeyWord := strings.ToLower(opts.Keyword)
  983. cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord}))
  984. count, err = sess.Table(&Cloudbrain{}).Where(cond).
  985. Join("left", "`user`", condition).Count(new(CloudbrainInfo))
  986. }
  987. if err != nil {
  988. return nil, 0, fmt.Errorf("Count: %v", err)
  989. }
  990. if opts.Page >= 0 && opts.PageSize > 0 {
  991. var start int
  992. if opts.Page == 0 {
  993. start = 0
  994. } else {
  995. start = (opts.Page - 1) * opts.PageSize
  996. }
  997. sess.Limit(opts.PageSize, start)
  998. }
  999. sess.OrderBy("cloudbrain.created_unix DESC")
  1000. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  1001. if err := sess.Table(&Cloudbrain{}).Where(cond).
  1002. Join("left", "`user`", condition).
  1003. Find(&cloudbrains); err != nil {
  1004. return nil, 0, fmt.Errorf("Find: %v", err)
  1005. }
  1006. if opts.NeedRepoInfo {
  1007. var ids []int64
  1008. for _, task := range cloudbrains {
  1009. ids = append(ids, task.RepoID)
  1010. }
  1011. repositoryMap, err := GetRepositoriesMapByIDs(ids)
  1012. if err == nil {
  1013. for _, task := range cloudbrains {
  1014. task.Repo = repositoryMap[task.RepoID]
  1015. }
  1016. }
  1017. }
  1018. return cloudbrains, count, nil
  1019. }
  1020. func QueryModelTrainJobVersionList(jobId string) ([]*CloudbrainInfo, int, error) {
  1021. sess := x.NewSession()
  1022. defer sess.Close()
  1023. var cond = builder.NewCond()
  1024. cond = cond.And(
  1025. builder.Eq{"cloudbrain.job_id": jobId},
  1026. )
  1027. cond = cond.And(
  1028. builder.Eq{"cloudbrain.Status": "COMPLETED"},
  1029. )
  1030. sess.OrderBy("cloudbrain.created_unix DESC")
  1031. cloudbrains := make([]*CloudbrainInfo, 0)
  1032. if err := sess.Table(&Cloudbrain{}).Where(cond).
  1033. Find(&cloudbrains); err != nil {
  1034. return nil, 0, fmt.Errorf("Find: %v", err)
  1035. }
  1036. return cloudbrains, int(len(cloudbrains)), nil
  1037. }
  1038. func QueryModelTrainJobList(repoId int64) ([]*CloudbrainInfo, int, error) {
  1039. sess := x.NewSession()
  1040. defer sess.Close()
  1041. var cond = builder.NewCond()
  1042. cond = cond.And(
  1043. builder.Eq{"repo_id": repoId},
  1044. )
  1045. cond = cond.And(
  1046. builder.Eq{"Status": "COMPLETED"},
  1047. )
  1048. cond = cond.And(
  1049. builder.Eq{"job_type": "TRAIN"},
  1050. )
  1051. cloudbrains := make([]*CloudbrainInfo, 0)
  1052. if err := sess.Select("job_id,job_name").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC").
  1053. Find(&cloudbrains); err != nil {
  1054. return nil, 0, fmt.Errorf("Find: %v", err)
  1055. }
  1056. keys := make(map[string]string)
  1057. uniqueElements := make([]*CloudbrainInfo, 0)
  1058. for _, entry := range cloudbrains {
  1059. if _, value := keys[entry.JobID]; !value {
  1060. keys[entry.JobID] = entry.JobName
  1061. uniqueElements = append(uniqueElements, entry)
  1062. }
  1063. }
  1064. return uniqueElements, int(len(uniqueElements)), nil
  1065. }
  1066. func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, error) {
  1067. sess := x.NewSession()
  1068. defer sess.Close()
  1069. var cond = builder.NewCond()
  1070. if opts.RepoID > 0 {
  1071. cond = cond.And(
  1072. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  1073. )
  1074. }
  1075. if opts.UserID > 0 {
  1076. cond = cond.And(
  1077. builder.Eq{"cloudbrain.user_id": opts.UserID},
  1078. )
  1079. }
  1080. if (opts.Type) >= 0 {
  1081. cond = cond.And(
  1082. builder.Eq{"cloudbrain.type": opts.Type},
  1083. )
  1084. }
  1085. if (opts.JobID) != "" {
  1086. cond = cond.And(
  1087. builder.Eq{"cloudbrain.job_id": opts.JobID},
  1088. )
  1089. }
  1090. if len(opts.JobTypes) > 0 {
  1091. cond = cond.And(
  1092. builder.In("cloudbrain.job_type", opts.JobTypes),
  1093. )
  1094. }
  1095. if len(opts.CloudbrainIDs) > 0 {
  1096. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  1097. }
  1098. count, err := sess.Where(cond).Count(new(Cloudbrain))
  1099. if err != nil {
  1100. return nil, 0, fmt.Errorf("Count: %v", err)
  1101. }
  1102. if opts.Page >= 0 && opts.PageSize > 0 {
  1103. var start int
  1104. if opts.Page == 0 {
  1105. start = 0
  1106. } else {
  1107. start = (opts.Page - 1) * opts.PageSize
  1108. }
  1109. sess.Limit(opts.PageSize, start)
  1110. }
  1111. sess.OrderBy("cloudbrain.created_unix DESC")
  1112. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  1113. if err := sess.Table(&Cloudbrain{}).Where(cond).
  1114. Join("left", "`user`", "cloudbrain.user_id = `user`.id").
  1115. Find(&cloudbrains); err != nil {
  1116. return nil, 0, fmt.Errorf("Find: %v", err)
  1117. }
  1118. return cloudbrains, int(count), nil
  1119. }
  1120. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  1121. if _, err = x.Insert(cloudbrain); err != nil {
  1122. return err
  1123. }
  1124. return nil
  1125. }
  1126. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  1127. has, err := x.Get(cb)
  1128. if err != nil {
  1129. return nil, err
  1130. } else if !has {
  1131. return nil, ErrJobNotExist{}
  1132. }
  1133. return cb, nil
  1134. }
  1135. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  1136. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  1137. return getRepoCloudBrain(cb)
  1138. }
  1139. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  1140. cb := &Cloudbrain{JobID: jobID}
  1141. return getRepoCloudBrain(cb)
  1142. }
  1143. func GetCloudbrainByJobIDAndVersionName(jobID string, versionName string) (*Cloudbrain, error) {
  1144. cb := &Cloudbrain{JobID: jobID, VersionName: versionName}
  1145. return getRepoCloudBrain(cb)
  1146. }
  1147. func GetCloudbrainByJobIDAndIsLatestVersion(jobID string, isLatestVersion string) (*Cloudbrain, error) {
  1148. cb := &Cloudbrain{JobID: jobID, IsLatestVersion: isLatestVersion}
  1149. return getRepoCloudBrain(cb)
  1150. }
  1151. func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) {
  1152. cloudBrains := make([]*Cloudbrain, 0)
  1153. err := x.Cols("job_id", "status", "type", "job_type", "version_id").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains)
  1154. return cloudBrains, err
  1155. }
  1156. func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) {
  1157. cloudBrains := make([]*Cloudbrain, 0)
  1158. err := x.Cols("job_id", "status", "type", "job_type", "version_id").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains)
  1159. return cloudBrains, err
  1160. }
  1161. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  1162. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  1163. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  1164. return
  1165. }
  1166. func SetTrainJobStatusByJobID(jobID string, status string, duration int64, trainjobduration string) (err error) {
  1167. cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration}
  1168. _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb)
  1169. return
  1170. }
  1171. func SetVersionCountAndLatestVersion(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) {
  1172. cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount}
  1173. _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb)
  1174. return
  1175. }
  1176. func UpdateJob(job *Cloudbrain) error {
  1177. return updateJob(x, job)
  1178. }
  1179. func updateJob(e Engine, job *Cloudbrain) error {
  1180. _, err := e.ID(job.ID).AllCols().Update(job)
  1181. return err
  1182. }
  1183. func UpdateTrainJobVersion(job *Cloudbrain) error {
  1184. return updateJobTrainVersion(x, job)
  1185. }
  1186. func updateJobTrainVersion(e Engine, job *Cloudbrain) error {
  1187. var sess *xorm.Session
  1188. sess = e.Where("job_id = ? AND version_name=?", job.JobID, job.VersionName)
  1189. _, err := sess.Cols("status", "train_job_duration").Update(job)
  1190. return err
  1191. }
  1192. func DeleteJob(job *Cloudbrain) error {
  1193. return deleteJob(x, job)
  1194. }
  1195. func deleteJob(e Engine, job *Cloudbrain) error {
  1196. _, err := e.ID(job.ID).Delete(job)
  1197. return err
  1198. }
  1199. func GetCloudbrainByName(jobName string) (*Cloudbrain, error) {
  1200. cb := &Cloudbrain{JobName: jobName}
  1201. return getRepoCloudBrain(cb)
  1202. }
  1203. func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool {
  1204. if !isSigned || (job.Status != string(JobStopped) && job.Status != string(JobFailed) && job.Status != string(ModelArtsStartFailed) && job.Status != string(ModelArtsCreateFailed)) {
  1205. return false
  1206. }
  1207. repo, err := GetRepositoryByID(job.RepoID)
  1208. if err != nil {
  1209. log.Error("GetRepositoryByID failed:%v", err.Error())
  1210. return false
  1211. }
  1212. permission, _ := GetUserRepoPermission(repo, user)
  1213. if err != nil {
  1214. log.Error("GetUserRepoPermission failed:%v", err.Error())
  1215. return false
  1216. }
  1217. if (user.ID == job.UserID && permission.AccessMode >= AccessModeWrite) || user.IsAdmin || permission.AccessMode >= AccessModeAdmin {
  1218. return true
  1219. }
  1220. return false
  1221. }
  1222. func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) {
  1223. cloudbrains := make([]*Cloudbrain, 0, 10)
  1224. return cloudbrains, x.
  1225. NotIn("status",
  1226. JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted,
  1227. ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed,
  1228. ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed).
  1229. Limit(100).
  1230. Find(&cloudbrains)
  1231. }
  1232. func GetCloudbrainCountByUserID(userID int64, jobType string) (int, error) {
  1233. count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeCloudBrainOne).Count(new(Cloudbrain))
  1234. return int(count), err
  1235. }
  1236. func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) {
  1237. count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting).
  1238. And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
  1239. return int(count), err
  1240. }
  1241. func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) {
  1242. count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted).
  1243. And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
  1244. return int(count), err
  1245. }
  1246. func GetCloudbrainInferenceJobCountByUserID(userID int64) (int, error) {
  1247. count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted).
  1248. And("job_type = ? and user_id = ? and type = ?", JobTypeInference, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
  1249. return int(count), err
  1250. }
  1251. func UpdateInferenceJob(job *Cloudbrain) error {
  1252. return updateInferenceJob(x, job)
  1253. }
  1254. func updateInferenceJob(e Engine, job *Cloudbrain) error {
  1255. var sess *xorm.Session
  1256. sess = e.Where("job_id = ?", job.JobID)
  1257. _, err := sess.Cols("status", "train_job_duration").Update(job)
  1258. return err
  1259. }
  1260. func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) {
  1261. sess := x.NewSession()
  1262. defer sess.Close()
  1263. if err = sess.Begin(); err != nil {
  1264. return err
  1265. }
  1266. if _, err = sess.Delete(old); err != nil {
  1267. sess.Rollback()
  1268. return err
  1269. }
  1270. if _, err = sess.Insert(new); err != nil {
  1271. sess.Rollback()
  1272. return err
  1273. }
  1274. if err = sess.Commit(); err != nil {
  1275. return err
  1276. }
  1277. return nil
  1278. }