package cloudbrain import ( "code.gitea.io/gitea/modules/timeutil" "encoding/json" "errors" "strconv" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/setting" ) const ( Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` CodeMountPath = "/code" DataSetMountPath = "/dataset" ModelMountPath = "/model" LogFile = "log.txt" BenchMarkMountPath = "/benchmark" BenchMarkResourceID = 1 Snn4imagenetMountPath = "/snn4imagenet" BrainScoreMountPath = "/brainscore" TaskInfoName = "/taskInfo" Snn4imagenetCommand = Command + ";" + `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s'` BrainScoreCommand = Command + ";" + `/opt/conda/bin/python /brainscore/score_a_model.py --brainRegion '%s' --modelname '%s' --modelpath '/dataset' --modeldescription '%s'` + ";" SubTaskName = "task1" Success = "S000" DefaultBranchName = "master" ) var ( ResourceSpecs *models.ResourceSpecs TrainResourceSpecs *models.ResourceSpecs ) func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { if !ctx.IsSigned { return false } log.Info("is repo owner:" + strconv.FormatBool(ctx.IsUserRepoOwner())) log.Info("is user admin:" + strconv.FormatBool(ctx.IsUserSiteAdmin())) if err != nil { return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin() } else { log.Info("is job creator:" + strconv.FormatBool(ctx.User.ID == job.UserID)) return ctx.IsUserRepoOwner() || ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID } } func CanDeleteJob(ctx *context.Context, job *models.Cloudbrain) bool { return isAdminOrOwnerOrJobCreater(ctx, job, nil) } func CanCreateOrDebugJob(ctx *context.Context) bool { if !ctx.IsSigned { return false } return ctx.Repo.CanWrite(models.UnitTypeCloudBrain) } func CanModifyJob(ctx *context.Context, job *models.Cloudbrain) bool { return isAdminOrJobCreater(ctx, job, nil) } func isAdminOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { if !ctx.IsSigned { return false } if err != nil { return ctx.IsUserSiteAdmin() } else { return ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID } } func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error) bool { if !ctx.IsSigned { return false } if err != nil { return ctx.IsUserSiteAdmin() } else { return ctx.IsUserSiteAdmin() || ctx.User.ID == image.UID } } func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { var ID = ctx.Params(":id") job, err := models.GetCloudbrainByID(ID) if err != nil { log.Error("GetCloudbrainByID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } ctx.Cloudbrain = job if !isAdminOrOwnerOrJobCreater(ctx, job, err) { log.Error("!isAdminOrOwnerOrJobCreater error:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } } func AdminOrJobCreaterRight(ctx *context.Context) { var ID = ctx.Params(":id") job, err := models.GetCloudbrainByID(ID) if err != nil { log.Error("GetCloudbrainByID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } ctx.Cloudbrain = job if !isAdminOrJobCreater(ctx, job, err) { log.Error("!isAdminOrJobCreater error:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } } func AdminOrOwnerOrJobCreaterRightForTrain(ctx *context.Context) { var jobID = ctx.Params(":jobid") job, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } ctx.Cloudbrain = job if !isAdminOrOwnerOrJobCreater(ctx, job, err) { log.Error("!isAdminOrOwnerOrJobCreater failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } } func AdminOrJobCreaterRightForTrain(ctx *context.Context) { var jobID = ctx.Params(":jobid") job, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } ctx.Cloudbrain = job if !isAdminOrJobCreater(ctx, job, err) { log.Error("!isAdminOrJobCreater errot:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } } func AdminOrImageCreaterRight(ctx *context.Context) { id, err := strconv.ParseInt(ctx.Params(":id"), 10, 64) var image *models.Image if err != nil { log.Error("Get Image by ID failed:%v", err.Error()) } else { image, err = models.GetImageByID(id) if err != nil { log.Error("Get Image by ID failed:%v", err.Error()) return } } if !isAdminOrImageCreater(ctx, image, err) { log.Error("!isAdminOrImageCreater error:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) } } func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description, branchName, bootFile, params string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { dataActualPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + models.AttachmentRelativePath(uuid) + uuid var resourceSpec *models.ResourceSpec var versionCount int if jobType == string(models.JobTypeTrain) { versionCount = 1 if TrainResourceSpecs == nil { json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) } for _, spec := range TrainResourceSpecs.ResourceSpec { if resourceSpecId == spec.Id { resourceSpec = spec } } } else { if ResourceSpecs == nil { json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) } for _, spec := range ResourceSpecs.ResourceSpec { if resourceSpecId == spec.Id { resourceSpec = spec } } } if resourceSpec == nil { log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"]) return errors.New("no such resourceSpec") } var datasetName string attach, err := models.GetAttachmentByUUID(uuid) if err != nil { //for benchmark, do not return error log.Error("GetAttachmentByUUID failed:%v", err) } else { datasetName = attach.Name } createTime := timeutil.TimeStampNow() jobResult, err := CreateJob(jobName, models.CreateJobParams{ JobName: jobName, RetryCount: 1, GpuType: gpuQueue, Image: image, TaskRoles: []models.TaskRole{ { Name: SubTaskName, TaskNumber: 1, MinSucceededTaskCount: 1, MinFailedTaskCount: 1, CPUNumber: resourceSpec.CpuNum, GPUNumber: resourceSpec.GpuNum, MemoryMB: resourceSpec.MemMiB, ShmMB: resourceSpec.ShareMemMiB, Command: command, NeedIBDevice: false, IsMainRole: false, UseNNI: false, }, }, Volumes: []models.Volume{ { HostPath: models.StHostPath{ Path: codePath, MountPath: CodeMountPath, ReadOnly: false, }, }, { HostPath: models.StHostPath{ Path: dataActualPath, MountPath: DataSetMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ Path: modelPath, MountPath: ModelMountPath, ReadOnly: false, }, }, { HostPath: models.StHostPath{ Path: benchmarkPath, MountPath: BenchMarkMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ Path: snn4imagenetPath, MountPath: Snn4imagenetMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ Path: brainScorePath, MountPath: BrainScoreMountPath, ReadOnly: true, }, }, }, }) if err != nil { log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) return err } if jobResult.Code != Success { log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"]) return errors.New(jobResult.Msg) } var jobID = jobResult.Payload["jobId"].(string) err = models.CreateCloudbrain(&models.Cloudbrain{ Status: string(models.JobWaiting), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, JobID: jobID, JobName: jobName, DisplayJobName: displayJobName, SubTaskName: SubTaskName, JobType: jobType, Type: models.TypeCloudBrainOne, Uuid: uuid, Image: image, GpuQueue: gpuQueue, ResourceSpecId: resourceSpecId, ComputeResource: models.GPUResource, BenchmarkTypeID: benchmarkTypeID, BenchmarkChildTypeID: benchmarkChildTypeID, Description: description, IsLatestVersion: "1", VersionCount: versionCount, BranchName: branchName, BootFile: bootFile, DatasetName: datasetName, Parameters: params, CreatedUnix: createTime, UpdatedUnix: createTime, }) if err != nil { return err } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByName failed: %v", err.Error()) return err } stringId := strconv.FormatInt(task.ID, 10) if string(models.JobTypeBenchmark) == jobType { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask) } else if string(models.JobTypeTrain) == jobType { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, displayJobName, models.ActionCreateGPUTrainTask) } else { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask) } return nil } func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) error { dataActualPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + models.AttachmentRelativePath(task.Uuid) + task.Uuid jobName := task.JobName var resourceSpec *models.ResourceSpec if ResourceSpecs == nil { json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) } for _, spec := range ResourceSpecs.ResourceSpec { if task.ResourceSpecId == spec.Id { resourceSpec = spec } } if resourceSpec == nil { log.Error("no such resourceSpecId(%d)", task.ResourceSpecId, ctx.Data["MsgID"]) return errors.New("no such resourceSpec") } createTime := timeutil.TimeStampNow() jobResult, err := CreateJob(jobName, models.CreateJobParams{ JobName: jobName, RetryCount: 1, GpuType: task.GpuQueue, Image: task.Image, TaskRoles: []models.TaskRole{ { Name: SubTaskName, TaskNumber: 1, MinSucceededTaskCount: 1, MinFailedTaskCount: 1, CPUNumber: resourceSpec.CpuNum, GPUNumber: resourceSpec.GpuNum, MemoryMB: resourceSpec.MemMiB, ShmMB: resourceSpec.ShareMemMiB, Command: Command, NeedIBDevice: false, IsMainRole: false, UseNNI: false, }, }, Volumes: []models.Volume{ { HostPath: models.StHostPath{ Path: storage.GetMinioPath(jobName, CodeMountPath+"/"), MountPath: CodeMountPath, ReadOnly: false, }, }, { HostPath: models.StHostPath{ Path: dataActualPath, MountPath: DataSetMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ Path: storage.GetMinioPath(jobName, ModelMountPath+"/"), MountPath: ModelMountPath, ReadOnly: false, }, }, { HostPath: models.StHostPath{ Path: storage.GetMinioPath(jobName, BenchMarkMountPath+"/"), MountPath: BenchMarkMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ Path: storage.GetMinioPath(jobName, Snn4imagenetMountPath+"/"), MountPath: Snn4imagenetMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ Path: storage.GetMinioPath(jobName, BrainScoreMountPath+"/"), MountPath: BrainScoreMountPath, ReadOnly: true, }, }, }, }) if err != nil { log.Error("CreateJob failed:%v", err.Error(), ctx.Data["MsgID"]) return err } if jobResult.Code != Success { log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"]) return errors.New(jobResult.Msg) } var jobID = jobResult.Payload["jobId"].(string) newTask := &models.Cloudbrain{ Status: string(models.JobWaiting), UserID: task.UserID, RepoID: task.RepoID, JobID: jobID, JobName: task.JobName, DisplayJobName: task.DisplayJobName, SubTaskName: task.SubTaskName, JobType: task.JobType, Type: task.Type, Uuid: task.Uuid, Image: task.Image, GpuQueue: task.GpuQueue, ResourceSpecId: task.ResourceSpecId, ComputeResource: task.ComputeResource, CreatedUnix: createTime, UpdatedUnix: createTime, BranchName: task.BranchName, } err = models.RestartCloudbrain(task, newTask) if err != nil { log.Error("RestartCloudbrain(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"]) return err } stringId := strconv.FormatInt(newTask.ID, 10) *newID = stringId notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, task.DisplayJobName, models.ActionCreateDebugGPUTask) return nil }