You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 11 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago

  1. package models
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "time"
  7. "xorm.io/xorm"
  8. "code.gitea.io/gitea/modules/setting"
  9. "code.gitea.io/gitea/modules/timeutil"
  10. "xorm.io/builder"
  11. )
  12. type CloudbrainStatus string
  13. type JobType string
  14. const (
  15. JobWaiting CloudbrainStatus = "WAITING"
  16. JobStopped CloudbrainStatus = "STOPPED"
  17. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  18. JobFailed CloudbrainStatus = "FAILED"
  19. JobRunning CloudbrainStatus = "RUNNING"
  20. JobTypeDebug JobType = "DEBUG"
  21. JobTypeBenchmark JobType = "BENCHMARK"
  22. )
  23. type Cloudbrain struct {
  24. ID int64 `xorm:"pk autoincr"`
  25. JobID string `xorm:"INDEX NOT NULL"`
  26. JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
  27. JobName string `xorm:"INDEX"`
  28. Status string `xorm:"INDEX"`
  29. UserID int64 `xorm:"INDEX"`
  30. RepoID int64 `xorm:"INDEX"`
  31. SubTaskName string `xorm:"INDEX"`
  32. ContainerID string
  33. ContainerIp string
  34. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  35. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  36. DeletedAt time.Time `xorm:"deleted"`
  37. CanDebug bool `xorm:"-"`
  38. User *User `xorm:"-"`
  39. Repo *Repository `xorm:"-"`
  40. }
  41. type CloudBrainLoginResult struct {
  42. Code string
  43. Msg string
  44. Payload map[string]interface{}
  45. }
  46. type TaskRole struct {
  47. Name string `json:"name"`
  48. TaskNumber int8 `json:"taskNumber"`
  49. MinSucceededTaskCount int8 `json:"minSucceededTaskCount"`
  50. MinFailedTaskCount int8 `json:"minFailedTaskCount"`
  51. CPUNumber int8 `json:"cpuNumber"`
  52. GPUNumber int8 `json:"gpuNumber"`
  53. MemoryMB int `json:"memoryMB"`
  54. ShmMB int `json:"shmMB"`
  55. Command string `json:"command"`
  56. NeedIBDevice bool `json:"needIBDevice"`
  57. IsMainRole bool `json:"isMainRole"`
  58. UseNNI bool `json:"useNNI"`
  59. }
  60. type StHostPath struct {
  61. Path string `json:"path"`
  62. MountPath string `json:"mountPath"`
  63. ReadOnly bool `json:"readOnly"`
  64. }
  65. type Volume struct {
  66. HostPath StHostPath `json:"hostPath"`
  67. }
  68. type CreateJobParams struct {
  69. JobName string `json:"jobName"`
  70. RetryCount int8 `json:"retryCount"`
  71. GpuType string `json:"gpuType"`
  72. Image string `json:"image"`
  73. TaskRoles []TaskRole `json:"taskRoles"`
  74. Volumes []Volume `json:"volumes"`
  75. }
  76. type CreateJobResult struct {
  77. Code string `json:"code"`
  78. Msg string `json:"msg"`
  79. Payload map[string]interface{} `json:"payload"`
  80. }
  81. type GetJobResult struct {
  82. Code string `json:"code"`
  83. Msg string `json:"msg"`
  84. Payload map[string]interface{} `json:"payload"`
  85. }
  86. type GetImagesResult struct {
  87. Code string `json:"code"`
  88. Msg string `json:"msg"`
  89. Payload map[string]*ImageInfo `json:"payload"`
  90. }
  91. type CloudbrainsOptions struct {
  92. ListOptions
  93. RepoID int64 // include all repos if empty
  94. UserID int64
  95. JobID int64
  96. SortType string
  97. CloudbrainIDs []int64
  98. // JobStatus CloudbrainStatus
  99. }
  100. type TaskPod struct {
  101. TaskRoleStatus struct {
  102. Name string `json:"name"`
  103. } `json:"taskRoleStatus"`
  104. TaskStatuses []struct {
  105. TaskIndex int `json:"taskIndex"`
  106. PodUID string `json:"podUid"`
  107. PodIP string `json:"podIp"`
  108. PodName string `json:"podName"`
  109. ContainerID string `json:"containerId"`
  110. ContainerIP string `json:"containerIp"`
  111. ContainerGpus string `json:"containerGpus"`
  112. State string `json:"state"`
  113. StartAt time.Time `json:"startAt"`
  114. FinishedAt time.Time `json:"finishedAt"`
  115. ExitCode int `json:"exitCode"`
  116. ExitDiagnostics string `json:"exitDiagnostics"`
  117. RetriedCount int `json:"retriedCount"`
  118. StartTime string
  119. FinishedTime string
  120. } `json:"taskStatuses"`
  121. }
  122. type TaskInfo struct {
  123. Username string `json:"username"`
  124. TaskName string `json:"task_name"`
  125. CodeName string `json:"code_name"`
  126. }
  127. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  128. data, _ := json.Marshal(input)
  129. var taskPod TaskPod
  130. err := json.Unmarshal(data, &taskPod)
  131. taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  132. taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
  133. return taskPod, err
  134. }
  135. type JobResultPayload struct {
  136. ID string `json:"id"`
  137. Name string `json:"name"`
  138. Platform string `json:"platform"`
  139. JobStatus struct {
  140. Username string `json:"username"`
  141. State string `json:"state"`
  142. SubState string `json:"subState"`
  143. ExecutionType string `json:"executionType"`
  144. Retries int `json:"retries"`
  145. CreatedTime int64 `json:"createdTime"`
  146. CompletedTime int64 `json:"completedTime"`
  147. AppID string `json:"appId"`
  148. AppProgress string `json:"appProgress"`
  149. AppTrackingURL string `json:"appTrackingUrl"`
  150. AppLaunchedTime int64 `json:"appLaunchedTime"`
  151. AppCompletedTime interface{} `json:"appCompletedTime"`
  152. AppExitCode int `json:"appExitCode"`
  153. AppExitDiagnostics string `json:"appExitDiagnostics"`
  154. AppExitType interface{} `json:"appExitType"`
  155. VirtualCluster string `json:"virtualCluster"`
  156. StartTime string
  157. EndTime string
  158. } `json:"jobStatus"`
  159. TaskRoles map[string]interface{} `json:"taskRoles"`
  160. Resource struct {
  161. CPU int `json:"cpu"`
  162. Memory string `json:"memory"`
  163. NvidiaComGpu int `json:"nvidia.com/gpu"`
  164. } `json:"resource"`
  165. Config struct {
  166. Image string `json:"image"`
  167. JobID string `json:"jobId"`
  168. GpuType string `json:"gpuType"`
  169. JobName string `json:"jobName"`
  170. JobType string `json:"jobType"`
  171. TaskRoles []struct {
  172. Name string `json:"name"`
  173. ShmMB int `json:"shmMB"`
  174. Command string `json:"command"`
  175. MemoryMB int `json:"memoryMB"`
  176. CPUNumber int `json:"cpuNumber"`
  177. GpuNumber int `json:"gpuNumber"`
  178. IsMainRole bool `json:"isMainRole"`
  179. TaskNumber int `json:"taskNumber"`
  180. NeedIBDevice bool `json:"needIBDevice"`
  181. MinFailedTaskCount int `json:"minFailedTaskCount"`
  182. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  183. } `json:"taskRoles"`
  184. RetryCount int `json:"retryCount"`
  185. } `json:"config"`
  186. Userinfo struct {
  187. User string `json:"user"`
  188. OrgID string `json:"org_id"`
  189. } `json:"userinfo"`
  190. }
  191. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  192. data, _ := json.Marshal(input)
  193. var jobResultPayload JobResultPayload
  194. err := json.Unmarshal(data, &jobResultPayload)
  195. jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
  196. jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
  197. return jobResultPayload, err
  198. }
  199. type ImagesResultPayload struct {
  200. Images []struct {
  201. ID int `json:"id"`
  202. Name string `json:"name"`
  203. Place string `json:"place"`
  204. Description string `json:"description"`
  205. Provider string `json:"provider"`
  206. Createtime string `json:"createtime"`
  207. Remark string `json:"remark"`
  208. } `json:"taskStatuses"`
  209. }
  210. type ImageInfo struct {
  211. ID int `json:"id"`
  212. Name string `json:"name"`
  213. Place string `json:"place"`
  214. Description string `json:"description"`
  215. Provider string `json:"provider"`
  216. Createtime string `json:"createtime"`
  217. Remark string `json:"remark"`
  218. PlaceView string
  219. }
  220. type CommitImageParams struct {
  221. Ip string `json:"ip"`
  222. TaskContainerId string `json:"taskContainerId"`
  223. ImageTag string `json:"imageTag"`
  224. ImageDescription string `json:"imageDescription"`
  225. }
  226. type CommitImageResult struct {
  227. Code string `json:"code"`
  228. Msg string `json:"msg"`
  229. Payload map[string]interface{} `json:"payload"`
  230. }
  231. type StopJobResult struct {
  232. Code string `json:"code"`
  233. Msg string `json:"msg"`
  234. }
  235. func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) {
  236. sess := x.NewSession()
  237. defer sess.Close()
  238. var cond = builder.NewCond()
  239. if opts.RepoID > 0 {
  240. cond = cond.And(
  241. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  242. )
  243. }
  244. if opts.UserID > 0 {
  245. cond = cond.And(
  246. builder.Eq{"cloudbrain.user_id": opts.UserID},
  247. )
  248. }
  249. if (opts.JobID) > 0 {
  250. cond = cond.And(
  251. builder.Eq{"cloudbrain.job_id": opts.JobID},
  252. )
  253. }
  254. // switch opts.JobStatus {
  255. // case JobWaiting:
  256. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  257. // case JobFailed:
  258. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  259. // case JobStopped:
  260. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  261. // case JobSucceeded:
  262. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  263. // }
  264. if len(opts.CloudbrainIDs) > 0 {
  265. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  266. }
  267. count, err := sess.Where(cond).Count(new(Cloudbrain))
  268. if err != nil {
  269. return nil, 0, fmt.Errorf("Count: %v", err)
  270. }
  271. if opts.Page >= 0 && opts.PageSize > 0 {
  272. var start int
  273. if opts.Page == 0 {
  274. start = 0
  275. } else {
  276. start = (opts.Page - 1) * opts.PageSize
  277. }
  278. sess.Limit(opts.PageSize, start)
  279. }
  280. sess.OrderBy("cloudbrain.created_unix DESC")
  281. cloudbrains := make([]*Cloudbrain, 0, setting.UI.IssuePagingNum)
  282. if err := sess.Where(cond).Find(&cloudbrains); err != nil {
  283. return nil, 0, fmt.Errorf("Find: %v", err)
  284. }
  285. sess.Close()
  286. return cloudbrains, count, nil
  287. }
  288. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  289. if _, err = x.Insert(cloudbrain); err != nil {
  290. return err
  291. }
  292. return nil
  293. }
  294. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  295. has, err := x.Get(cb)
  296. if err != nil {
  297. return nil, err
  298. } else if !has {
  299. return nil, errors.New("cloudbrain task is not found")
  300. }
  301. return cb, nil
  302. }
  303. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  304. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  305. return getRepoCloudBrain(cb)
  306. }
  307. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  308. cb := &Cloudbrain{JobID: jobID}
  309. return getRepoCloudBrain(cb)
  310. }
  311. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  312. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  313. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  314. return
  315. }
  316. func UpdateJob(job *Cloudbrain) error {
  317. return updateJob(x, job)
  318. }
  319. func updateJob(e Engine, job *Cloudbrain) error {
  320. var sess *xorm.Session
  321. sess = e.Where("job_id = ?", job.JobID)
  322. _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
  323. return err
  324. }
  325. func DeleteJob(job *Cloudbrain) error {
  326. return deleteJob(x, job)
  327. }
  328. func deleteJob(e Engine, job *Cloudbrain) error {
  329. _, err := e.ID(job.ID).Delete(job)
  330. return err
  331. }