You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 32 kB

3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
5 years ago
5 years ago
4 years ago
3 years ago
5 years ago
4 years ago
5 years ago
3 years ago
3 years ago
3 years ago
5 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. package models
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "time"
  7. "xorm.io/builder"
  8. "xorm.io/xorm"
  9. "code.gitea.io/gitea/modules/log"
  10. "code.gitea.io/gitea/modules/setting"
  11. "code.gitea.io/gitea/modules/timeutil"
  12. )
  13. type CloudbrainStatus string
  14. type JobType string
  15. type ModelArtsJobStatus string
  16. const (
  17. JobWaiting CloudbrainStatus = "WAITING"
  18. JobStopped CloudbrainStatus = "STOPPED"
  19. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  20. JobFailed CloudbrainStatus = "FAILED"
  21. JobRunning CloudbrainStatus = "RUNNING"
  22. JobTypeDebug JobType = "DEBUG"
  23. JobTypeBenchmark JobType = "BENCHMARK"
  24. JobTypeSnn4imagenet JobType = "SNN4IMAGENET"
  25. JobTypeBrainScore JobType = "BRAINSCORE"
  26. JobTypeTrain JobType = "TRAIN"
  27. ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
  28. ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
  29. ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
  30. ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中
  31. ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动
  32. ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中
  33. ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中
  34. ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败
  35. ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中
  36. ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中
  37. ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止
  38. ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障
  39. ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
  40. ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
  41. ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败
  42. )
  43. type Cloudbrain struct {
  44. ID int64 `xorm:"pk autoincr"`
  45. JobID string `xorm:"INDEX NOT NULL"`
  46. JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
  47. JobName string `xorm:"INDEX"`
  48. Status string `xorm:"INDEX"`
  49. UserID int64 `xorm:"INDEX"`
  50. RepoID int64 `xorm:"INDEX"`
  51. SubTaskName string `xorm:"INDEX"`
  52. ContainerID string
  53. ContainerIp string
  54. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  55. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  56. Duration int64 `xorm:"INDEX duration"`
  57. TrainJobDuration string
  58. DeletedAt time.Time `xorm:"deleted"`
  59. CanDebug bool `xorm:"-"`
  60. CanDel bool `xorm:"-"`
  61. Type int `xorm:"INDEX DEFAULT 0"`
  62. VersionID int64 `xorm:"INDEX DEFAULT 0"`
  63. VersionName string
  64. Uuid string
  65. DatasetName string
  66. User *User `xorm:"-"`
  67. Repo *Repository `xorm:"-"`
  68. }
  69. type CloudbrainInfo struct {
  70. Cloudbrain `xorm:"extends"`
  71. User `xorm:"extends"`
  72. }
  73. type CloudBrainLoginResult struct {
  74. Code string
  75. Msg string
  76. Payload map[string]interface{}
  77. }
  78. type TaskRole struct {
  79. Name string `json:"name"`
  80. TaskNumber int `json:"taskNumber"`
  81. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  82. MinFailedTaskCount int `json:"minFailedTaskCount"`
  83. CPUNumber int `json:"cpuNumber"`
  84. GPUNumber int `json:"gpuNumber"`
  85. MemoryMB int `json:"memoryMB"`
  86. ShmMB int `json:"shmMB"`
  87. Command string `json:"command"`
  88. NeedIBDevice bool `json:"needIBDevice"`
  89. IsMainRole bool `json:"isMainRole"`
  90. UseNNI bool `json:"useNNI"`
  91. }
  92. type StHostPath struct {
  93. Path string `json:"path"`
  94. MountPath string `json:"mountPath"`
  95. ReadOnly bool `json:"readOnly"`
  96. }
  97. type Volume struct {
  98. HostPath StHostPath `json:"hostPath"`
  99. }
  100. type CreateJobParams struct {
  101. JobName string `json:"jobName"`
  102. RetryCount int8 `json:"retryCount"`
  103. GpuType string `json:"gpuType"`
  104. Image string `json:"image"`
  105. TaskRoles []TaskRole `json:"taskRoles"`
  106. Volumes []Volume `json:"volumes"`
  107. }
  108. type CreateJobResult struct {
  109. Code string `json:"code"`
  110. Msg string `json:"msg"`
  111. Payload map[string]interface{} `json:"payload"`
  112. }
  113. type GetJobResult struct {
  114. Code string `json:"code"`
  115. Msg string `json:"msg"`
  116. Payload map[string]interface{} `json:"payload"`
  117. }
  118. type GetImagesResult struct {
  119. Code string `json:"code"`
  120. Msg string `json:"msg"`
  121. Payload GetImagesPayload `json:"payload"`
  122. }
  123. type GetImagesPayload struct {
  124. Count int `json:"count"`
  125. TotalPages int `json:"totalPages,omitempty"`
  126. ImageInfo []*ImageInfo `json:"rows"`
  127. }
  128. type CloudbrainsOptions struct {
  129. ListOptions
  130. RepoID int64 // include all repos if empty
  131. UserID int64
  132. JobID int64
  133. SortType string
  134. CloudbrainIDs []int64
  135. // JobStatus CloudbrainStatus
  136. Type int
  137. JobType string
  138. }
  139. type TaskPod struct {
  140. TaskRoleStatus struct {
  141. Name string `json:"name"`
  142. } `json:"taskRoleStatus"`
  143. //TaskStatuses []struct {
  144. // TaskIndex int `json:"taskIndex"`
  145. // PodUID string `json:"podUid"`
  146. // PodIP string `json:"podIp"`
  147. // PodName string `json:"podName"`
  148. // ContainerID string `json:"containerId"`
  149. // ContainerIP string `json:"containerIp"`
  150. // ContainerGpus string `json:"containerGpus"`
  151. // State string `json:"state"`
  152. // StartAt time.Time `json:"startAt"`
  153. // FinishedAt time.Time `json:"finishedAt"`
  154. // ExitCode int `json:"exitCode"`
  155. // ExitDiagnostics string `json:"exitDiagnostics"`
  156. // RetriedCount int `json:"retriedCount"`
  157. // StartTime string
  158. // FinishedTime string
  159. //} `json:"taskStatuses"`
  160. TaskStatuses []TaskStatuses `json:"taskStatuses"`
  161. }
  162. type TaskStatuses struct {
  163. TaskIndex int `json:"taskIndex"`
  164. PodUID string `json:"podUid"`
  165. PodIP string `json:"podIp"`
  166. PodName string `json:"podName"`
  167. ContainerID string `json:"containerId"`
  168. ContainerIP string `json:"containerIp"`
  169. ContainerGpus string `json:"containerGpus"`
  170. State string `json:"state"`
  171. StartAt time.Time `json:"startAt"`
  172. FinishedAt time.Time `json:"finishedAt"`
  173. ExitCode int `json:"exitCode"`
  174. ExitDiagnostics string `json:"exitDiagnostics"`
  175. RetriedCount int `json:"retriedCount"`
  176. StartTime string
  177. FinishedTime string
  178. }
  179. type TaskInfo struct {
  180. Username string `json:"username"`
  181. TaskName string `json:"task_name"`
  182. CodeName string `json:"code_name"`
  183. BenchmarkCategory []string `json:"selected_category"`
  184. CodeLink string `json:"code_link"`
  185. GpuType string `json:"gpu_type"`
  186. }
  187. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  188. data, _ := json.Marshal(input)
  189. var taskPod TaskPod
  190. err := json.Unmarshal(data, &taskPod)
  191. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  192. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  193. //if the task is not finished or stopped,the cloudbrain renturns 0001-01-01 08:00:00, the finishedTime shows with -
  194. if strings.HasPrefix(taskPod.TaskStatuses[0].FinishedTime, "0001") {
  195. taskPod.TaskStatuses[0].FinishedTime = "-"
  196. }
  197. return taskPod, err
  198. }
  199. type JobResultPayload struct {
  200. ID string `json:"id"`
  201. Name string `json:"name"`
  202. Platform string `json:"platform"`
  203. JobStatus struct {
  204. Username string `json:"username"`
  205. State string `json:"state"`
  206. SubState string `json:"subState"`
  207. ExecutionType string `json:"executionType"`
  208. Retries int `json:"retries"`
  209. CreatedTime int64 `json:"createdTime"`
  210. CompletedTime int64 `json:"completedTime"`
  211. AppID string `json:"appId"`
  212. AppProgress string `json:"appProgress"`
  213. AppTrackingURL string `json:"appTrackingUrl"`
  214. AppLaunchedTime int64 `json:"appLaunchedTime"`
  215. AppCompletedTime interface{} `json:"appCompletedTime"`
  216. AppExitCode int `json:"appExitCode"`
  217. AppExitDiagnostics string `json:"appExitDiagnostics"`
  218. AppExitType interface{} `json:"appExitType"`
  219. VirtualCluster string `json:"virtualCluster"`
  220. StartTime string
  221. EndTime string
  222. } `json:"jobStatus"`
  223. TaskRoles map[string]interface{} `json:"taskRoles"`
  224. Resource struct {
  225. CPU int `json:"cpu"`
  226. Memory string `json:"memory"`
  227. NvidiaComGpu int `json:"nvidia.com/gpu"`
  228. } `json:"resource"`
  229. Config struct {
  230. Image string `json:"image"`
  231. JobID string `json:"jobId"`
  232. GpuType string `json:"gpuType"`
  233. JobName string `json:"jobName"`
  234. JobType string `json:"jobType"`
  235. TaskRoles []struct {
  236. Name string `json:"name"`
  237. ShmMB int `json:"shmMB"`
  238. Command string `json:"command"`
  239. MemoryMB int `json:"memoryMB"`
  240. CPUNumber int `json:"cpuNumber"`
  241. GpuNumber int `json:"gpuNumber"`
  242. IsMainRole bool `json:"isMainRole"`
  243. TaskNumber int `json:"taskNumber"`
  244. NeedIBDevice bool `json:"needIBDevice"`
  245. MinFailedTaskCount int `json:"minFailedTaskCount"`
  246. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  247. } `json:"taskRoles"`
  248. RetryCount int `json:"retryCount"`
  249. } `json:"config"`
  250. Userinfo struct {
  251. User string `json:"user"`
  252. OrgID string `json:"org_id"`
  253. } `json:"userinfo"`
  254. }
  255. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  256. data, _ := json.Marshal(input)
  257. var jobResultPayload JobResultPayload
  258. err := json.Unmarshal(data, &jobResultPayload)
  259. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  260. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  261. if jobResultPayload.JobStatus.State == string(JobWaiting) {
  262. jobResultPayload.JobStatus.StartTime = "-"
  263. jobResultPayload.JobStatus.EndTime = "-"
  264. }
  265. return jobResultPayload, err
  266. }
  267. type ImagesResultPayload struct {
  268. Images []struct {
  269. ID int `json:"id"`
  270. Name string `json:"name"`
  271. Place string `json:"place"`
  272. Description string `json:"description"`
  273. Provider string `json:"provider"`
  274. Createtime string `json:"createtime"`
  275. Remark string `json:"remark"`
  276. } `json:"taskStatuses"`
  277. }
  278. type ImageInfo struct {
  279. ID int `json:"id"`
  280. Name string `json:"name"`
  281. Place string `json:"place"`
  282. Description string `json:"description"`
  283. Provider string `json:"provider"`
  284. Createtime string `json:"createtime"`
  285. Remark string `json:"remark"`
  286. IsPublic int `json:"isPublic"`
  287. PlaceView string
  288. }
  289. type Categories struct {
  290. Category []*Category `json:"category"`
  291. }
  292. type Category struct {
  293. Id int `json:"id"`
  294. Value string `json:"value"`
  295. }
  296. type GpuInfos struct {
  297. GpuInfo []*GpuInfo `json:"gpu_type"`
  298. }
  299. type GpuInfo struct {
  300. Id int `json:"id"`
  301. Value string `json:"value"`
  302. Queue string `json:"queue"`
  303. }
  304. type ResourceSpecs struct {
  305. ResourceSpec []*ResourceSpec `json:"resorce_specs"`
  306. }
  307. type ResourceSpec struct {
  308. Id int `json:"id"`
  309. CpuNum int `json:"cpu"`
  310. GpuNum int `json:"gpu"`
  311. MemMiB int `json:"memMiB"`
  312. ShareMemMiB int `json:"shareMemMiB"`
  313. }
  314. type FlavorInfos struct {
  315. FlavorInfo []*FlavorInfo `json:"flavor_info"`
  316. }
  317. type FlavorInfo struct {
  318. Id int `json:"id"`
  319. Value string `json:"value"`
  320. }
  321. type PoolInfos struct {
  322. PoolInfo []*PoolInfo `json:"pool_info"`
  323. }
  324. type PoolInfo struct {
  325. PoolId string `json:"pool_id"`
  326. PoolName string `json:"pool_name"`
  327. PoolType string `json:"pool_type"`
  328. }
  329. type CommitImageParams struct {
  330. Ip string `json:"ip"`
  331. TaskContainerId string `json:"taskContainerId"`
  332. ImageTag string `json:"imageTag"`
  333. ImageDescription string `json:"imageDescription"`
  334. }
  335. type CommitImageResult struct {
  336. Code string `json:"code"`
  337. Msg string `json:"msg"`
  338. Payload map[string]interface{} `json:"payload"`
  339. }
  340. type CloudBrainResult struct {
  341. Code string `json:"code"`
  342. Msg string `json:"msg"`
  343. }
  344. type CreateNotebookParams struct {
  345. JobName string `json:"name"`
  346. Description string `json:"description"`
  347. ProfileID string `json:"profile_id"`
  348. Flavor string `json:"flavor"`
  349. Spec Spec `json:"spec"`
  350. Workspace Workspace `json:"workspace"`
  351. Pool Pool `json:"pool"`
  352. }
  353. type Pool struct {
  354. ID string `json:"id"`
  355. Name string `json:"name"`
  356. Type string `json:"type"`
  357. }
  358. type Workspace struct {
  359. ID string `json:"id"`
  360. }
  361. type Spec struct {
  362. Storage Storage `json:"storage"`
  363. AutoStop AutoStop `json:"auto_stop"`
  364. }
  365. type AutoStop struct {
  366. Enable bool `json:"enable"`
  367. Duration int `json:"duration"`
  368. }
  369. type Storage struct {
  370. Type string `json:"type"`
  371. Location Location `json:"location"`
  372. }
  373. type Location struct {
  374. Path string `json:"path"`
  375. }
  376. type NotebookResult struct {
  377. ErrorCode string `json:"error_code"`
  378. ErrorMsg string `json:"error_msg"`
  379. }
  380. type CreateNotebookResult struct {
  381. ErrorCode string `json:"error_code"`
  382. ErrorMsg string `json:"error_msg"`
  383. ID string `json:"id"`
  384. Name string `json:"name"`
  385. Description string `json:"description"`
  386. Status string `json:"status"`
  387. CreationTimestamp string `json:"creation_timestamp"`
  388. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  389. Profile struct {
  390. ID string `json:"id"`
  391. Name string `json:"name"`
  392. Description string `json:"description"`
  393. DeType string `json:"de_type"`
  394. FlavorType string `json:"flavor_type"`
  395. } `json:"profile"`
  396. Flavor string `json:"flavor"`
  397. FlavorDetails struct {
  398. Name string `json:"name"`
  399. Status string `json:"status"`
  400. QueuingNum int `json:"queuing_num"`
  401. QueueLeftTime int `json:"queue_left_time"` //s
  402. Duration int `json:"duration"` //auto_stop_time s
  403. } `json:"flavor_details"`
  404. }
  405. type GetNotebookResult struct {
  406. ErrorCode string `json:"error_code"`
  407. ErrorMsg string `json:"error_msg"`
  408. ID string `json:"id"`
  409. Name string `json:"name"`
  410. Description string `json:"description"`
  411. Status string `json:"status"`
  412. CreationTimestamp string `json:"creation_timestamp"`
  413. CreateTime string
  414. LatestUpdateTimestamp string `json:"latest_update_timestamp"`
  415. LatestUpdateTime string
  416. Profile struct {
  417. ID string `json:"id"`
  418. Name string `json:"name"`
  419. Description string `json:"description"`
  420. DeType string `json:"de_type"`
  421. FlavorType string `json:"flavor_type"`
  422. } `json:"profile"`
  423. Flavor string `json:"flavor"`
  424. FlavorDetails struct {
  425. Name string `json:"name"`
  426. Status string `json:"status"`
  427. QueuingNum int `json:"queuing_num"`
  428. QueueLeftTime int `json:"queue_left_time"` //s
  429. Duration int `json:"duration"` //auto_stop_time s
  430. } `json:"flavor_details"`
  431. QueuingInfo struct {
  432. ID string `json:"id"`
  433. Name string `json:"name"`
  434. Flavor string `json:"flavor"`
  435. DeType string `json:"de_type"`
  436. Status string `json:"status"`
  437. BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue
  438. BeginTime string
  439. RemainTime int `json:"remain_time"` //remain time of instance
  440. EndTimestamp int `json:"end_timestamp"` //
  441. EndTime string
  442. Rank int `json:"rank"` //rank of instance in queue
  443. } `json:"queuing_info"`
  444. Spec struct {
  445. Annotations struct {
  446. TargetDomain string `json:"target_domain"`
  447. Url string `json:"url"`
  448. } `json:"annotations"`
  449. } `json:"spec"`
  450. }
  451. type GetTokenParams struct {
  452. Auth Auth `json:"auth"`
  453. }
  454. type Auth struct {
  455. Identity Identity `json:"identity"`
  456. Scope Scope `json:"scope"`
  457. }
  458. type Scope struct {
  459. Project Project `json:"project"`
  460. }
  461. type Project struct {
  462. Name string `json:"name"`
  463. }
  464. type Identity struct {
  465. Methods []string `json:"methods"`
  466. Password Password `json:"password"`
  467. }
  468. type Password struct {
  469. User NotebookUser `json:"user"`
  470. }
  471. type NotebookUser struct {
  472. Name string `json:"name"`
  473. Password string `json:"password"`
  474. Domain Domain `json:"domain"`
  475. }
  476. type Domain struct {
  477. Name string `json:"name"`
  478. }
  479. const (
  480. ActionStart = "start"
  481. ActionStop = "stop"
  482. ActionRestart = "restart"
  483. ActionQueue = "queue"
  484. ActionDequeue = "dequeue"
  485. )
  486. type NotebookAction struct {
  487. Action string `json:"action"`
  488. }
  489. type NotebookActionResult struct {
  490. ErrorCode string `json:"error_code"`
  491. ErrorMsg string `json:"error_msg"`
  492. CurrentStatus string `json:"current_status"`
  493. PreviousState string `json:"previous_state"`
  494. }
  495. type NotebookGetJobTokenResult struct {
  496. ErrorCode string `json:"error_code"`
  497. ErrorMsg string `json:"error_msg"`
  498. Token string `json:"token"`
  499. }
  500. type NotebookDelResult struct {
  501. InstanceID string `json:"instance_id"`
  502. }
  503. type CreateTrainJobParams struct {
  504. JobName string `json:"job_name"`
  505. Description string `json:"job_desc"`
  506. Config Config `json:"config"`
  507. WorkspaceID string `json:"workspace_id"`
  508. }
  509. type Config struct {
  510. WorkServerNum int `json:"worker_server_num"`
  511. AppUrl string `json:"app_url"` //训练作业的代码目录
  512. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  513. Parameter []Parameter `json:"parameter"`
  514. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  515. //DatasetID string `json:"dataset_id"`
  516. //DataVersionID string `json:"dataset_version_id"`
  517. //DataSource []DataSource `json:"data_source"`
  518. //SpecID int64 `json:"spec_id"`
  519. EngineID int64 `json:"engine_id"`
  520. //ModelID int64 `json:"model_id"`
  521. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  522. LogUrl string `json:"log_url"`
  523. //UserImageUrl string `json:"user_image_url"`
  524. //UserCommand string `json:"user_command"`
  525. CreateVersion bool `json:"create_version"`
  526. //Volumes []Volumes `json:"volumes"`
  527. Flavor Flavor `json:"flavor"`
  528. PoolID string `json:"pool_id"`
  529. }
  530. type CreateConfigParams struct {
  531. ConfigName string `json:"config_name"`
  532. Description string `json:"config_desc"`
  533. WorkServerNum int `json:"worker_server_num"`
  534. AppUrl string `json:"app_url"` //训练作业的代码目录
  535. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  536. Parameter []Parameter `json:"parameter"`
  537. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  538. //DatasetID string `json:"dataset_id"`
  539. //DataVersionID string `json:"dataset_version_id"`
  540. //DataSource []DataSource `json:"data_source"`
  541. //SpecID int64 `json:"spec_id"`
  542. EngineID int64 `json:"engine_id"`
  543. //ModelID int64 `json:"model_id"`
  544. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  545. LogUrl string `json:"log_url"`
  546. //UserImageUrl string `json:"user_image_url"`
  547. //UserCommand string `json:"user_command"`
  548. //CreateVersion bool `json:"create_version"`
  549. //Volumes []Volumes `json:"volumes"`
  550. Flavor Flavor `json:"flavor"`
  551. PoolID string `json:"pool_id"`
  552. }
  553. type Parameter struct {
  554. Label string `json:"label"`
  555. Value string `json:"value"`
  556. }
  557. type Parameters struct {
  558. Parameter []Parameter `json:"parameter"`
  559. }
  560. type DataSource struct {
  561. DatasetID string `json:"dataset_id"`
  562. DatasetVersion string `json:"dataset_version"`
  563. Type string `json:"type"`
  564. DataUrl string `json:"data_url"`
  565. }
  566. type Volumes struct {
  567. Nfs Nfs `json:"nfs"`
  568. HostPath HostPath `json:"host_path"`
  569. }
  570. type Nfs struct {
  571. ID string `json:"id"`
  572. SourcePath string `json:"src_path"`
  573. DestPath string `json:"dest_path"`
  574. ReadOnly bool `json:"read_only"`
  575. }
  576. type HostPath struct {
  577. SourcePath string `json:"src_path"`
  578. DestPath string `json:"dest_path"`
  579. ReadOnly bool `json:"read_only"`
  580. }
  581. type Flavor struct {
  582. Code string `json:"code"`
  583. }
  584. type CreateTrainJobResult struct {
  585. ErrorCode string `json:"error_code"`
  586. ErrorMsg string `json:"error_msg"`
  587. IsSuccess bool `json:"is_success"`
  588. JobName string `json:"job_name"`
  589. JobID int64 `json:"job_id"`
  590. Status int `json:"status"`
  591. CreateTime int64 `json:"create_time"`
  592. VersionID int64 `json:"version_id"`
  593. ResourceID string `json:"resource_id"`
  594. VersionName string `json:"version_name"`
  595. }
  596. type CreateTrainJobConfigResult struct {
  597. ErrorCode string `json:"error_code"`
  598. ErrorMsg string `json:"error_msg"`
  599. IsSuccess bool `json:"is_success"`
  600. }
  601. type GetResourceSpecsResult struct {
  602. ErrorCode string `json:"error_code"`
  603. ErrorMsg string `json:"error_msg"`
  604. IsSuccess bool `json:"is_success"`
  605. SpecTotalCount int `json:"spec_total_count"`
  606. Specs []Specs `json:"specs"`
  607. }
  608. type Specs struct {
  609. Core string `json:"core"`
  610. Cpu string `json:"cpu"`
  611. IsNoResource bool `json:"no_resource"`
  612. GpuType string `json:"gpu_type"`
  613. SpecID int64 `json:"spec_id"`
  614. GpuNum int `json:"gpu_num"`
  615. SpecCode string `json:"spec_code"`
  616. Storage string `json:"storage"`
  617. MaxNum int `json:"max_num"`
  618. UnitNum int `json:"unit_num"`
  619. InterfaceType int `json:"interface_type"`
  620. }
  621. type GetConfigListResult struct {
  622. ErrorCode string `json:"error_code"`
  623. ErrorMsg string `json:"error_msg"`
  624. IsSuccess bool `json:"is_success"`
  625. ConfigTotalCount int `json:"config_total_count"`
  626. ParaConfigs []ParaConfig `json:"configs"`
  627. }
  628. type ParaConfig struct {
  629. ConfigName string `json:"config_name"`
  630. ConfigDesc string `json:"config_desc"`
  631. CreateTime int64 `json:"create_time"`
  632. EngineType int `json:"engine_type"`
  633. EngineName string `json:"engine_name"`
  634. EngineId int64 `json:"engine_id"`
  635. EngineVersion string `json:"engine_version"`
  636. UserImageUrl string `json:"user_image_url"`
  637. UserCommand string `json:"user_command"`
  638. Result GetConfigResult
  639. }
  640. type GetConfigResult struct {
  641. ErrorCode string `json:"error_code"`
  642. ErrorMsg string `json:"error_msg"`
  643. IsSuccess bool `json:"is_success"`
  644. ConfigName string `json:"config_name"`
  645. Description string `json:"config_desc"`
  646. WorkServerNum int `json:"worker_server_num"`
  647. AppUrl string `json:"app_url"` //训练作业的代码目录
  648. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  649. Parameter []Parameter `json:"parameter"`
  650. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  651. //DatasetID string `json:"dataset_id"`
  652. //DataVersionID string `json:"dataset_version_id"`
  653. //DataSource []DataSource `json:"data_source"`
  654. //SpecID int64 `json:"spec_id"`
  655. EngineID int64 `json:"engine_id"`
  656. //ModelID int64 `json:"model_id"`
  657. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  658. LogUrl string `json:"log_url"`
  659. //UserImageUrl string `json:"user_image_url"`
  660. //UserCommand string `json:"user_command"`
  661. //CreateVersion bool `json:"create_version"`
  662. //Volumes []Volumes `json:"volumes"`
  663. Flavor Flavor `json:"flavor"`
  664. PoolID string `json:"pool_id"`
  665. }
  666. type ErrorResult struct {
  667. ErrorCode string `json:"error_code"`
  668. ErrorMsg string `json:"error_message"`
  669. IsSuccess bool `json:"is_success"`
  670. }
  671. type GetTrainJobResult struct {
  672. IsSuccess bool `json:"is_success"`
  673. JobName string `json:"job_name"`
  674. JobID int64 `json:"job_id"`
  675. Description string `json:"job_desc"`
  676. IntStatus int `json:"status"`
  677. Status string
  678. LongCreateTime int64 `json:"create_time"`
  679. CreateTime string
  680. Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒
  681. TrainJobDuration string //训练作业的运行时间,格式为hh:mm:ss
  682. VersionID int64 `json:"version_id"`
  683. ResourceID string `json:"resource_id"`
  684. VersionName string `json:"version_name"`
  685. PreVersionID int64 `json:"pre_version_id"`
  686. WorkServerNum int `json:"worker_server_num"`
  687. AppUrl string `json:"app_url"` //训练作业的代码目录
  688. BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
  689. Parameter []Parameter `json:"parameter"`
  690. DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
  691. //DatasetID string `json:"dataset_id"`
  692. //DataVersionID string `json:"dataset_version_id"`
  693. //DataSource []DataSource `json:"data_source"`
  694. //SpecID int64 `json:"spec_id"`
  695. EngineID int64 `json:"engine_id"`
  696. EngineName string `json:"engine_name"`
  697. EngineVersion string `json:"engine_version"`
  698. //ModelID int64 `json:"model_id"`
  699. TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
  700. LogUrl string `json:"log_url"`
  701. //UserImageUrl string `json:"user_image_url"`
  702. //UserCommand string `json:"user_command"`
  703. //Volumes []Volumes `json:"volumes"`
  704. Flavor Flavor `json:"flavor"`
  705. PoolID string `json:"pool_id"`
  706. PoolName string `json:"pool_name"`
  707. NasMountPath string `json:"nas_mount_path"`
  708. NasShareAddr string `json:"nas_share_addr"`
  709. DatasetName string
  710. }
  711. type GetTrainJobLogResult struct {
  712. ErrorCode string `json:"error_code"`
  713. ErrorMsg string `json:"error_msg"`
  714. IsSuccess bool `json:"is_success"`
  715. Content string `json:"content"`
  716. Lines int `json:"lines"`
  717. StartLine string `json:"start_line"`
  718. EndLine string `json:"end_line"`
  719. }
  720. type GetTrainJobLogFileNamesResult struct {
  721. ErrorCode string `json:"error_code"`
  722. ErrorMsg string `json:"error_msg"`
  723. IsSuccess bool `json:"is_success"`
  724. LogFileList []string `json:"log_file_list"`
  725. }
  726. type TrainJobResult struct {
  727. ErrorCode string `json:"error_code"`
  728. ErrorMsg string `json:"error_msg"`
  729. IsSuccess bool `json:"is_success"`
  730. }
  731. type LogFile struct {
  732. Name string
  733. }
  734. func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
  735. sess := x.NewSession()
  736. defer sess.Close()
  737. var cond = builder.NewCond()
  738. if opts.RepoID > 0 {
  739. cond = cond.And(
  740. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  741. )
  742. }
  743. if opts.UserID > 0 {
  744. cond = cond.And(
  745. builder.Eq{"cloudbrain.user_id": opts.UserID},
  746. )
  747. }
  748. if (opts.JobID) > 0 {
  749. cond = cond.And(
  750. builder.Eq{"cloudbrain.job_id": opts.JobID},
  751. )
  752. }
  753. if (opts.Type) >= 0 {
  754. cond = cond.And(
  755. builder.Eq{"cloudbrain.type": opts.Type},
  756. )
  757. }
  758. if (opts.JobType) != "" {
  759. cond = cond.And(
  760. builder.Eq{"cloudbrain.job_type": opts.JobType},
  761. )
  762. }
  763. // switch opts.JobStatus {
  764. // case JobWaiting:
  765. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  766. // case JobFailed:
  767. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  768. // case JobStopped:
  769. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  770. // case JobSucceeded:
  771. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  772. // }
  773. if len(opts.CloudbrainIDs) > 0 {
  774. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  775. }
  776. count, err := sess.Where(cond).Count(new(Cloudbrain))
  777. if err != nil {
  778. return nil, 0, fmt.Errorf("Count: %v", err)
  779. }
  780. if opts.Page >= 0 && opts.PageSize > 0 {
  781. var start int
  782. if opts.Page == 0 {
  783. start = 0
  784. } else {
  785. start = (opts.Page - 1) * opts.PageSize
  786. }
  787. sess.Limit(opts.PageSize, start)
  788. }
  789. sess.OrderBy("cloudbrain.created_unix DESC")
  790. cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum)
  791. if err := sess.Table(&Cloudbrain{}).Where(cond).
  792. Join("left", "`user`", "cloudbrain.user_id = `user`.id").
  793. Find(&cloudbrains); err != nil {
  794. return nil, 0, fmt.Errorf("Find: %v", err)
  795. }
  796. sess.Close()
  797. return cloudbrains, count, nil
  798. }
  799. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  800. if _, err = x.Insert(cloudbrain); err != nil {
  801. return err
  802. }
  803. return nil
  804. }
  805. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  806. has, err := x.Get(cb)
  807. if err != nil {
  808. return nil, err
  809. } else if !has {
  810. return nil, ErrJobNotExist{}
  811. }
  812. return cb, nil
  813. }
  814. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  815. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  816. return getRepoCloudBrain(cb)
  817. }
  818. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  819. cb := &Cloudbrain{JobID: jobID}
  820. return getRepoCloudBrain(cb)
  821. }
  822. func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) {
  823. cloudBrains := make([]*Cloudbrain, 0)
  824. err := x.Cols("job_id", "status", "type").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains)
  825. return cloudBrains, err
  826. }
  827. func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) {
  828. cloudBrains := make([]*Cloudbrain, 0)
  829. err := x.Cols("job_id", "status", "type").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains)
  830. return cloudBrains, err
  831. }
  832. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  833. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  834. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  835. return
  836. }
  837. func SetTrainJobStatusByJobID(jobID string, status string, duration int64, trainjobduration string) (err error) {
  838. cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration}
  839. _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb)
  840. return
  841. }
  842. func UpdateJob(job *Cloudbrain) error {
  843. return updateJob(x, job)
  844. }
  845. func updateJob(e Engine, job *Cloudbrain) error {
  846. var sess *xorm.Session
  847. sess = e.Where("job_id = ?", job.JobID)
  848. _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  849. return err
  850. }
  851. // func UpdateTrainJob(job *CloudbrainInfo) error {
  852. // return updateTrainJob(x, job)
  853. // }
  854. // func updateTrainJob(e Engine, job *CloudbrainInfo) error {
  855. // var sess *xorm.Session
  856. // sess = e.Where("job_id = ?", job.Cloudbrain.JobID)
  857. // _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  858. // return err
  859. // }
  860. func DeleteJob(job *Cloudbrain) error {
  861. return deleteJob(x, job)
  862. }
  863. func deleteJob(e Engine, job *Cloudbrain) error {
  864. _, err := e.ID(job.ID).Delete(job)
  865. return err
  866. }
  867. func GetCloudbrainByName(jobName string) (*Cloudbrain, error) {
  868. cb := &Cloudbrain{JobName: jobName}
  869. return getRepoCloudBrain(cb)
  870. }
  871. func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool {
  872. if !isSigned || (job.Status != string(JobStopped) && job.Status != string(JobFailed) && job.Status != string(ModelArtsStartFailed) && job.Status != string(ModelArtsCreateFailed)) {
  873. return false
  874. }
  875. repo, err := GetRepositoryByID(job.RepoID)
  876. if err != nil {
  877. log.Error("GetRepositoryByID failed:%v", err.Error())
  878. return false
  879. }
  880. permission, _ := GetUserRepoPermission(repo, user)
  881. if err != nil {
  882. log.Error("GetUserRepoPermission failed:%v", err.Error())
  883. return false
  884. }
  885. if (user.ID == job.UserID && permission.AccessMode >= AccessModeWrite) || user.IsAdmin || permission.AccessMode >= AccessModeAdmin {
  886. return true
  887. }
  888. return false
  889. }