You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 9.5 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. package models
  2. import (
  3. "code.gitea.io/gitea/modules/log"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "time"
  8. "code.gitea.io/gitea/modules/setting"
  9. "code.gitea.io/gitea/modules/timeutil"
  10. "xorm.io/builder"
  11. )
  12. type CloudbrainStatus string
  13. const (
  14. JobWaiting CloudbrainStatus = "WAITING"
  15. JobStopped CloudbrainStatus = "STOPPED"
  16. JobSucceeded CloudbrainStatus = "SUCCEEDED"
  17. JobFailed CloudbrainStatus = "FAILED"
  18. )
  19. type Cloudbrain struct {
  20. ID int64 `xorm:"pk autoincr"`
  21. JobID string `xorm:"INDEX NOT NULL"`
  22. JobName string
  23. Status string `xorm:"INDEX"`
  24. UserID int64 `xorm:"INDEX"`
  25. RepoID int64 `xorm:"INDEX"`
  26. SubTaskName string `xorm:"INDEX"`
  27. CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
  28. UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
  29. User *User `xorm:"-"`
  30. Repo *Repository `xorm:"-"`
  31. }
  32. type CloudBrainLoginResult struct {
  33. Code string
  34. Msg string
  35. Payload map[string]interface{}
  36. }
  37. type TaskRole struct {
  38. Name string `json:"name"`
  39. TaskNumber int8 `json:"taskNumber"`
  40. MinSucceededTaskCount int8 `json:"minSucceededTaskCount"`
  41. MinFailedTaskCount int8 `json:"minFailedTaskCount"`
  42. CPUNumber int8 `json:"cpuNumber"`
  43. GPUNumber int8 `json:"gpuNumber"`
  44. MemoryMB int `json:"memoryMB"`
  45. ShmMB int `json:"shmMB"`
  46. Command string `json:"command"`
  47. NeedIBDevice bool `json:"needIBDevice"`
  48. IsMainRole bool `json:"isMainRole"`
  49. UseNNI bool `json:"useNNI"`
  50. }
  51. type StHostPath struct {
  52. Path string `json:"path"`
  53. MountPath string `json:"mountPath"`
  54. ReadOnly bool `json:"readOnly"`
  55. }
  56. type Volume struct {
  57. HostPath StHostPath `json:"hostPath"`
  58. }
  59. type CreateJobParams struct {
  60. JobName string `json:"jobName"`
  61. RetryCount int8 `json:"retryCount"`
  62. GpuType string `json:"gpuType"`
  63. Image string `json:"image"`
  64. TaskRoles []TaskRole `json:"taskRoles"`
  65. Volumes []Volume `json:"volumes"`
  66. }
  67. type CreateJobResult struct {
  68. Code string
  69. Msg string
  70. Payload map[string]interface{}
  71. }
  72. type GetJobResult struct {
  73. Code string `json:"code"`
  74. Msg string `json:"msg"`
  75. Payload map[string]interface{} `json:"payload"`
  76. }
  77. type GetImagesResult struct {
  78. Code string `json:"code"`
  79. Msg string `json:"msg"`
  80. Payload map[string]ImageInfo `json:"payload"`
  81. }
  82. type CloudbrainsOptions struct {
  83. ListOptions
  84. RepoID int64 // include all repos if empty
  85. UserID int64
  86. JobID int64
  87. SortType string
  88. CloudbrainIDs []int64
  89. // JobStatus CloudbrainStatus
  90. }
  91. type TaskPod struct {
  92. TaskRoleStatus struct {
  93. Name string `json:"name"`
  94. } `json:"taskRoleStatus"`
  95. TaskStatuses []struct {
  96. TaskIndex int `json:"taskIndex"`
  97. PodUID string `json:"podUid"`
  98. PodIP string `json:"podIp"`
  99. PodName string `json:"podName"`
  100. ContainerID string `json:"containerId"`
  101. ContainerIP string `json:"containerIp"`
  102. ContainerGpus string `json:"containerGpus"`
  103. State string `json:"state"`
  104. StartAt time.Time `json:"startAt"`
  105. FinishedAt time.Time `json:"finishedAt"`
  106. ExitCode int `json:"exitCode"`
  107. ExitDiagnostics string `json:"exitDiagnostics"`
  108. RetriedCount int `json:"retriedCount"`
  109. } `json:"taskStatuses"`
  110. }
  111. func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
  112. data, _ := json.Marshal(input)
  113. var taskPod TaskPod
  114. err := json.Unmarshal(data, &taskPod)
  115. return taskPod, err
  116. }
  117. type JobResultPayload struct {
  118. ID string `json:"id"`
  119. Name string `json:"name"`
  120. Platform string `json:"platform"`
  121. JobStatus struct {
  122. Username string `json:"username"`
  123. State string `json:"state"`
  124. SubState string `json:"subState"`
  125. ExecutionType string `json:"executionType"`
  126. Retries int `json:"retries"`
  127. CreatedTime int64 `json:"createdTime"`
  128. CompletedTime int64 `json:"completedTime"`
  129. AppID string `json:"appId"`
  130. AppProgress string `json:"appProgress"`
  131. AppTrackingURL string `json:"appTrackingUrl"`
  132. AppLaunchedTime int64 `json:"appLaunchedTime"`
  133. AppCompletedTime interface{} `json:"appCompletedTime"`
  134. AppExitCode int `json:"appExitCode"`
  135. AppExitDiagnostics string `json:"appExitDiagnostics"`
  136. AppExitType interface{} `json:"appExitType"`
  137. VirtualCluster string `json:"virtualCluster"`
  138. } `json:"jobStatus"`
  139. TaskRoles map[string]interface{} `json:"taskRoles"`
  140. Resource struct {
  141. CPU int `json:"cpu"`
  142. Memory string `json:"memory"`
  143. NvidiaComGpu int `json:"nvidia.com/gpu"`
  144. } `json:"resource"`
  145. Config struct {
  146. Image string `json:"image"`
  147. JobID string `json:"jobId"`
  148. GpuType string `json:"gpuType"`
  149. JobName string `json:"jobName"`
  150. JobType string `json:"jobType"`
  151. TaskRoles []struct {
  152. Name string `json:"name"`
  153. ShmMB int `json:"shmMB"`
  154. Command string `json:"command"`
  155. MemoryMB int `json:"memoryMB"`
  156. CPUNumber int `json:"cpuNumber"`
  157. GpuNumber int `json:"gpuNumber"`
  158. IsMainRole bool `json:"isMainRole"`
  159. TaskNumber int `json:"taskNumber"`
  160. NeedIBDevice bool `json:"needIBDevice"`
  161. MinFailedTaskCount int `json:"minFailedTaskCount"`
  162. MinSucceededTaskCount int `json:"minSucceededTaskCount"`
  163. } `json:"taskRoles"`
  164. RetryCount int `json:"retryCount"`
  165. } `json:"config"`
  166. Userinfo struct {
  167. User string `json:"user"`
  168. OrgID string `json:"org_id"`
  169. } `json:"userinfo"`
  170. }
  171. func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
  172. data, _ := json.Marshal(input)
  173. var jobResultPayload JobResultPayload
  174. err := json.Unmarshal(data, &jobResultPayload)
  175. return jobResultPayload, err
  176. }
  177. type ImagesResultPayload struct {
  178. Images []struct {
  179. ID int `json:"id"`
  180. Name string `json:"name"`
  181. Place string `json:"place"`
  182. Description string `json:"description"`
  183. Provider string `json:"provider"`
  184. Createtime string `json:"createtime"`
  185. Remark string `json:"remark"`
  186. } `json:"taskStatuses"`
  187. }
  188. type ImageInfo struct {
  189. ID int `json:"id"`
  190. Name string `json:"name"`
  191. Place string `json:"place"`
  192. Description string `json:"description"`
  193. Provider string `json:"provider"`
  194. Createtime string `json:"createtime"`
  195. Remark string `json:"remark"`
  196. }
  197. func ConvertToImagesResultPayload(input map[string]ImageInfo) (ImagesResultPayload, error) {
  198. for _,info := range input {
  199. log.Info(info.Name)
  200. }
  201. var res ImagesResultPayload
  202. return res, nil
  203. }
  204. func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) {
  205. sess := x.NewSession()
  206. defer sess.Close()
  207. var cond = builder.NewCond()
  208. if opts.RepoID > 0 {
  209. cond = cond.And(
  210. builder.Eq{"cloudbrain.repo_id": opts.RepoID},
  211. )
  212. }
  213. if opts.UserID > 0 {
  214. cond = cond.And(
  215. builder.Eq{"cloudbrain.user_id": opts.UserID},
  216. )
  217. }
  218. if (opts.JobID) > 0 {
  219. cond = cond.And(
  220. builder.Eq{"cloudbrain.job_id": opts.JobID},
  221. )
  222. }
  223. // switch opts.JobStatus {
  224. // case JobWaiting:
  225. // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
  226. // case JobFailed:
  227. // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
  228. // case JobStopped:
  229. // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
  230. // case JobSucceeded:
  231. // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
  232. // }
  233. if len(opts.CloudbrainIDs) > 0 {
  234. cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
  235. }
  236. count, err := sess.Where(cond).Count(new(Cloudbrain))
  237. if err != nil {
  238. return nil, 0, fmt.Errorf("Count: %v", err)
  239. }
  240. if opts.Page >= 0 && opts.PageSize > 0 {
  241. var start int
  242. if opts.Page == 0 {
  243. start = 0
  244. } else {
  245. start = (opts.Page - 1) * opts.PageSize
  246. }
  247. sess.Limit(opts.PageSize, start)
  248. }
  249. sess.OrderBy("cloudbrain.created_unix DESC")
  250. cloudbrains := make([]*Cloudbrain, 0, setting.UI.IssuePagingNum)
  251. if err := sess.Where(cond).Find(&cloudbrains); err != nil {
  252. return nil, 0, fmt.Errorf("Find: %v", err)
  253. }
  254. sess.Close()
  255. return cloudbrains, count, nil
  256. }
  257. func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
  258. if _, err = x.Insert(cloudbrain); err != nil {
  259. return err
  260. }
  261. return nil
  262. }
  263. func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
  264. has, err := x.Get(cb)
  265. if err != nil {
  266. return nil, err
  267. } else if !has {
  268. return nil, errors.New("cloudbrain task is not found")
  269. }
  270. return cb, nil
  271. }
  272. func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
  273. cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
  274. return getRepoCloudBrain(cb)
  275. }
  276. func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
  277. cb := &Cloudbrain{JobID: jobID}
  278. return getRepoCloudBrain(cb)
  279. }
  280. func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
  281. cb := &Cloudbrain{JobID: jobID, Status: string(status)}
  282. _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
  283. return
  284. }