diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 8fb13ca4c..bf1761955 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -44,6 +44,31 @@ var ( TrainResourceSpecs *models.ResourceSpecs ) +type GenerateCloudBrainTaskReq struct { + Ctx *context.Context + DisplayJobName string + JobName string + Image string + Command string + Uuids string + CodePath string + ModelPath string + BenchmarkPath string + Snn4ImageNetPath string + BrainScorePath string + JobType string + GpuQueue string + Description string + BranchName string + BootFile string + Params string + CommitID string + DataLocalPath string + BenchmarkTypeID int + BenchmarkChildTypeID int + ResourceSpecId int +} + func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { if !ctx.IsSigned { return false @@ -187,23 +212,23 @@ func AdminOrImageCreaterRight(ctx *context.Context) { } -func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description, branchName, bootFile, params, commitID string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { +func GenerateTask(req GenerateCloudBrainTaskReq) error { dataActualPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + - models.AttachmentRelativePath(uuid) + - uuid + models.AttachmentRelativePath(req.Uuids) + + req.Uuids var resourceSpec *models.ResourceSpec var versionCount int - if jobType == string(models.JobTypeTrain) { + if req.JobType == string(models.JobTypeTrain) { versionCount = 1 if TrainResourceSpecs == nil { json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) } for _, spec := range TrainResourceSpecs.ResourceSpec { - if resourceSpecId == spec.Id { + if req.ResourceSpecId == spec.Id { resourceSpec = spec } } @@ -212,7 +237,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) } for _, spec := range ResourceSpecs.ResourceSpec { - if resourceSpecId == spec.Id { + if req.ResourceSpecId == spec.Id { resourceSpec = spec } } @@ -220,25 +245,25 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, } if resourceSpec == nil { - log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"]) + log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) return errors.New("no such resourceSpec") } var datasetName string - attach, err := models.GetAttachmentByUUID(uuid) + attach, err := models.GetAttachmentByUUID(req.Uuids) if err != nil { //for benchmark, do not return error - log.Error("GetAttachmentByUUID failed:%v", err) + log.Error("GetAttachmentByUUID failed:%v", err, req.Ctx.Data["MsgID"]) } else { datasetName = attach.Name } createTime := timeutil.TimeStampNow() - jobResult, err := CreateJob(jobName, models.CreateJobParams{ - JobName: jobName, + jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ + JobName: req.JobName, RetryCount: 1, - GpuType: gpuQueue, - Image: image, + GpuType: req.GpuQueue, + Image: req.Image, TaskRoles: []models.TaskRole{ { Name: SubTaskName, @@ -249,7 +274,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, GPUNumber: resourceSpec.GpuNum, MemoryMB: resourceSpec.MemMiB, ShmMB: resourceSpec.ShareMemMiB, - Command: command, + Command: req.Command, NeedIBDevice: false, IsMainRole: false, UseNNI: false, @@ -258,7 +283,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, Volumes: []models.Volume{ { HostPath: models.StHostPath{ - Path: codePath, + Path: req.CodePath, MountPath: CodeMountPath, ReadOnly: false, }, @@ -272,28 +297,28 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, }, { HostPath: models.StHostPath{ - Path: modelPath, + Path: req.ModelPath, MountPath: ModelMountPath, ReadOnly: false, }, }, { HostPath: models.StHostPath{ - Path: benchmarkPath, + Path: req.BenchmarkPath, MountPath: BenchMarkMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ - Path: snn4imagenetPath, + Path: req.Snn4ImageNetPath, MountPath: Snn4imagenetMountPath, ReadOnly: true, }, }, { HostPath: models.StHostPath{ - Path: brainScorePath, + Path: req.BrainScorePath, MountPath: BrainScoreMountPath, ReadOnly: true, }, @@ -301,42 +326,42 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, }, }) if err != nil { - log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) + log.Error("CreateJob failed:", err.Error(), req.Ctx.Data["MsgID"]) return err } if jobResult.Code != Success { - log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"]) + log.Error("CreateJob(%s) failed:%s", req.JobName, jobResult.Msg, req.Ctx.Data["MsgID"]) return errors.New(jobResult.Msg) } var jobID = jobResult.Payload["jobId"].(string) err = models.CreateCloudbrain(&models.Cloudbrain{ Status: string(models.JobWaiting), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, + UserID: req.Ctx.User.ID, + RepoID: req.Ctx.Repo.Repository.ID, JobID: jobID, - JobName: jobName, - DisplayJobName: displayJobName, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, SubTaskName: SubTaskName, - JobType: jobType, + JobType: req.JobType, Type: models.TypeCloudBrainOne, - Uuid: uuid, - Image: image, - GpuQueue: gpuQueue, - ResourceSpecId: resourceSpecId, + Uuid: req.Uuids, + Image: req.Image, + GpuQueue: req.GpuQueue, + ResourceSpecId: req.ResourceSpecId, ComputeResource: models.GPUResource, - BenchmarkTypeID: benchmarkTypeID, - BenchmarkChildTypeID: benchmarkChildTypeID, - Description: description, + BenchmarkTypeID: req.BenchmarkTypeID, + BenchmarkChildTypeID: req.BenchmarkChildTypeID, + Description: req.Description, IsLatestVersion: "1", VersionCount: versionCount, - BranchName: branchName, - BootFile: bootFile, + BranchName: req.BranchName, + BootFile: req.BootFile, DatasetName: datasetName, - Parameters: params, + Parameters: req.Params, CreatedUnix: createTime, UpdatedUnix: createTime, - CommitID: commitID, + CommitID: req.CommitID, }) if err != nil { @@ -345,17 +370,17 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, task, err := models.GetCloudbrainByJobID(jobID) if err != nil { - log.Error("GetCloudbrainByName failed: %v", err.Error()) + log.Error("GetCloudbrainByJobID failed: %v", err.Error()) return err } stringId := strconv.FormatInt(task.ID, 10) - if IsBenchmarkJob(jobType) { - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask) - } else if string(models.JobTypeTrain) == jobType { - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, displayJobName, models.ActionCreateGPUTrainTask) + if IsBenchmarkJob(req.JobType) { + notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateBenchMarkTask) + } else if string(models.JobTypeTrain) == req.JobType { + notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateGPUTrainTask) } else { - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask) + notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask) } return nil diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index eb2ddc93b..f3f2fddb0 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -207,7 +207,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { displayJobName := form.DisplayJobName jobName := util.ConvertDisplayJobNameToJobName(displayJobName) image := strings.TrimSpace(form.Image) - uuid := form.Attachment + uuids := form.Attachment jobType := form.JobType gpuQueue := form.GpuType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath @@ -273,6 +273,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } } + if err = checkDatasetLimit(uuids); err != nil { + log.Error("checkDatasetLimit failed: %v", err, ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("checkDatasetLimit failed", tpl, &form) + return + } + if branchName == "" { branchName = cloudbrain.DefaultBranchName } @@ -285,11 +292,31 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) - err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params, - commitID, 0, 0, resourceSpecId) + req := cloudbrain.GenerateCloudBrainTaskReq{ + Ctx: ctx, + DisplayJobName: displayJobName, + JobName: jobName, + Image: image, + Command: command, + Uuids: uuids, + CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), + ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), + Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), + JobType: jobType, + GpuQueue: gpuQueue, + Description: form.Description, + BranchName: branchName, + BootFile: form.BootFile, + Params: form.Params, + CommitID: commitID, + BenchmarkTypeID: 0, + BenchmarkChildTypeID: 0, + ResourceSpecId: resourceSpecId, + } + + err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tpl, &form) @@ -1982,11 +2009,38 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo //return } - err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, cloudbrain.DefaultBranchName, "", "", - "", benchmarkTypeID, benchmarkChildTypeID, resourceSpecId) + dataActualPath := setting.Attachment.Minio.RealPath + + setting.Attachment.Minio.Bucket + "/" + + setting.Attachment.Minio.BasePath + + models.AttachmentRelativePath(childInfo.Attachment) + + childInfo.Attachment + + req := cloudbrain.GenerateCloudBrainTaskReq{ + Ctx: ctx, + DisplayJobName: displayJobName, + JobName: jobName, + Image: image, + Command: command, + Uuids: childInfo.Attachment, + CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), + ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), + Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), + JobType: string(models.JobTypeBenchmark), + GpuQueue: gpuQueue, + Description: form.Description, + BranchName: cloudbrain.DefaultBranchName, + BootFile: "", + Params: "", + CommitID: "", + BenchmarkTypeID: benchmarkTypeID, + BenchmarkChildTypeID: benchmarkChildTypeID, + ResourceSpecId: resourceSpecId, + DataLocalPath: dataActualPath, + } + + err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form) @@ -2080,11 +2134,38 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) command = fmt.Sprintf(cloudbrain.BrainScoreCommand, getBrainRegion(benchmarkChildTypeID), displayJobName, trimSpaceNewlineInString(form.Description)) } - err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params, - "", 0, benchmarkChildTypeID, resourceSpecId) + dataActualPath := setting.Attachment.Minio.RealPath + + setting.Attachment.Minio.Bucket + "/" + + setting.Attachment.Minio.BasePath + + models.AttachmentRelativePath(uuid) + + uuid + + req := cloudbrain.GenerateCloudBrainTaskReq{ + Ctx: ctx, + DisplayJobName: displayJobName, + JobName: jobName, + Image: image, + Command: command, + Uuids: uuid, + CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), + ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), + Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), + JobType: jobType, + GpuQueue: gpuQueue, + Description: form.Description, + BranchName: branchName, + BootFile: form.BootFile, + Params: form.Params, + CommitID: "", + BenchmarkTypeID: 0, + BenchmarkChildTypeID: benchmarkChildTypeID, + ResourceSpecId: resourceSpecId, + DataLocalPath: dataActualPath, + } + + err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tpl, &form) @@ -2195,3 +2276,27 @@ func GetBenchmarkTypes(ctx *context.Context) *models.BenchmarkTypes { } return benchmarkTypesMap[lang] } + +func checkDatasetLimit(uuidStr string) error { + uuids := strings.Split(uuidStr, ";") + if len(uuids) > 5 { + log.Error("the dataset count(%d) exceed the limit", len(uuids)) + return errors.New("the dataset count exceed the limit") + } + + attachNames := make(map[string]string) + for _, uuid := range uuids { + attach, err := models.GetAttachmentByUUID(uuid) + if err != nil { + log.Error("GetAttachmentByUUID failed: %v", err) + return err + } + + if _, ok := attachNames[attach.Name]; ok { + log.Error("the dataset name is same: %v", attach.Name) + return errors.New("the dataset name is same") + } + attachNames[attach.Name] = attach.Name + } + return nil +}