You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 5.1 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package cloudbrain
  2. import (
  3. "errors"
  4. "code.gitea.io/gitea/modules/setting"
  5. "code.gitea.io/gitea/models"
  6. "code.gitea.io/gitea/modules/context"
  7. "code.gitea.io/gitea/modules/log"
  8. )
  9. const (
  10. Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
  11. CodeMountPath = "/code"
  12. DataSetMountPath = "/dataset"
  13. ModelMountPath = "/model"
  14. BenchMarkMountPath = "/benchmark"
  15. Snn4imagenetMountPath = "/snn4imagenet"
  16. BrainScoreMountPath = "/brainscore"
  17. TaskInfoName = "/taskInfo"
  18. SubTaskName = "task1"
  19. Success = "S000"
  20. )
  21. var (
  22. ResourceSpecs *models.ResourceSpecs
  23. )
  24. func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
  25. if err != nil {
  26. return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin()
  27. } else {
  28. return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  29. }
  30. }
  31. func CanDeleteDebugJob(ctx *context.Context, job *models.Cloudbrain) bool {
  32. return isAdminOrOwnerOrJobCreater(ctx, job, nil)
  33. }
  34. func CanDeleteTrainJob(ctx *context.Context, job *models.Cloudbrain) bool {
  35. return isAdminOrOwnerOrJobCreater(ctx, job, nil)
  36. }
  37. func CanCreateOrDebugJob(ctx *context.Context) bool {
  38. return ctx.Repo.CanWrite(models.UnitTypeCloudBrain)
  39. }
  40. func CanModifyJob(ctx *context.Context, job *models.Cloudbrain) bool {
  41. return isAdminOrJobCreater(ctx, job, nil)
  42. }
  43. func isAdminOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
  44. if err != nil {
  45. return ctx.IsUserSiteAdmin()
  46. } else {
  47. return ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  48. }
  49. }
  50. func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) {
  51. var jobID = ctx.Params(":jobid")
  52. job, err := models.GetCloudbrainByJobID(jobID)
  53. if !isAdminOrOwnerOrJobCreater(ctx, job, err) {
  54. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  55. }
  56. }
  57. func AdminOrJobCreaterRight(ctx *context.Context) {
  58. var jobID = ctx.Params(":jobid")
  59. job, err := models.GetCloudbrainByJobID(jobID)
  60. if !isAdminOrJobCreater(ctx, job, err) {
  61. ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
  62. }
  63. }
  64. func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue string, resourceSpecId int) error {
  65. dataActualPath := setting.Attachment.Minio.RealPath +
  66. setting.Attachment.Minio.Bucket + "/" +
  67. setting.Attachment.Minio.BasePath +
  68. models.AttachmentRelativePath(uuid) +
  69. uuid
  70. var resourceSpec *models.ResourceSpec
  71. for _, spec := range ResourceSpecs.ResourceSpec {
  72. if resourceSpecId == spec.Id {
  73. resourceSpec = spec
  74. }
  75. }
  76. if resourceSpec == nil {
  77. log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"])
  78. return errors.New("no such resourceSpec")
  79. }
  80. jobResult, err := CreateJob(jobName, models.CreateJobParams{
  81. JobName: jobName,
  82. RetryCount: 1,
  83. GpuType: gpuQueue,
  84. Image: image,
  85. TaskRoles: []models.TaskRole{
  86. {
  87. Name: SubTaskName,
  88. TaskNumber: 1,
  89. MinSucceededTaskCount: 1,
  90. MinFailedTaskCount: 1,
  91. CPUNumber: resourceSpec.CpuNum,
  92. GPUNumber: resourceSpec.GpuNum,
  93. MemoryMB: resourceSpec.MemMiB,
  94. ShmMB: resourceSpec.ShareMemMiB,
  95. Command: command,
  96. NeedIBDevice: false,
  97. IsMainRole: false,
  98. UseNNI: false,
  99. },
  100. },
  101. Volumes: []models.Volume{
  102. {
  103. HostPath: models.StHostPath{
  104. Path: codePath,
  105. MountPath: CodeMountPath,
  106. ReadOnly: false,
  107. },
  108. },
  109. {
  110. HostPath: models.StHostPath{
  111. Path: dataActualPath,
  112. MountPath: DataSetMountPath,
  113. ReadOnly: true,
  114. },
  115. },
  116. {
  117. HostPath: models.StHostPath{
  118. Path: modelPath,
  119. MountPath: ModelMountPath,
  120. ReadOnly: false,
  121. },
  122. },
  123. {
  124. HostPath: models.StHostPath{
  125. Path: benchmarkPath,
  126. MountPath: BenchMarkMountPath,
  127. ReadOnly: true,
  128. },
  129. },
  130. {
  131. HostPath: models.StHostPath{
  132. Path: snn4imagenetPath,
  133. MountPath: Snn4imagenetMountPath,
  134. ReadOnly: true,
  135. },
  136. },
  137. {
  138. HostPath: models.StHostPath{
  139. Path: brainScorePath,
  140. MountPath: BrainScoreMountPath,
  141. ReadOnly: true,
  142. },
  143. },
  144. },
  145. })
  146. if err != nil {
  147. log.Error("CreateJob failed:", err.Error())
  148. return err
  149. }
  150. if jobResult.Code != Success {
  151. log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg)
  152. return errors.New(jobResult.Msg)
  153. }
  154. var jobID = jobResult.Payload["jobId"].(string)
  155. err = models.CreateCloudbrain(&models.Cloudbrain{
  156. Status: string(models.JobWaiting),
  157. UserID: ctx.User.ID,
  158. RepoID: ctx.Repo.Repository.ID,
  159. JobID: jobID,
  160. JobName: jobName,
  161. SubTaskName: SubTaskName,
  162. JobType: jobType,
  163. Type: models.TypeCloudBrainOne,
  164. Uuid: uuid,
  165. })
  166. if err != nil {
  167. return err
  168. }
  169. return nil
  170. }