diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 8e1b94a97..ea1826de0 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -570,11 +570,12 @@ type SpecialPools struct { Pools []*SpecialPool `json:"pools"` } type SpecialPool struct { - Org string `json:"org"` - Type string `json:"type"` - IsExclusive bool `json:"isExclusive"` - Pool []*GpuInfo `json:"pool"` - JobType []string `json:"jobType"` + Org string `json:"org"` + Type string `json:"type"` + IsExclusive bool `json:"isExclusive"` + Pool []*GpuInfo `json:"pool"` + JobType []string `json:"jobType"` + ResourceSpec []*ResourceSpec `json:"resourceSpecs"` } type ImageInfosModelArts struct { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 430304dd5..6c0681aba 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -42,6 +42,7 @@ const ( var ( ResourceSpecs *models.ResourceSpecs TrainResourceSpecs *models.ResourceSpecs + SpecialPools *models.SpecialPools ) type GenerateCloudBrainTaskReq struct { @@ -222,6 +223,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { for _, spec := range TrainResourceSpecs.ResourceSpec { if req.ResourceSpecId == spec.Id { resourceSpec = spec + break } } } else { @@ -231,10 +233,29 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { for _, spec := range ResourceSpecs.ResourceSpec { if req.ResourceSpecId == spec.Id { resourceSpec = spec + break } } } + //如果没有匹配到spec信息,尝试从专属资源池获取 + if resourceSpec == nil && SpecialPools != nil { + for _, specialPool := range SpecialPools.Pools { + if resourceSpec != nil { + break + } + if specialPool.ResourceSpec != nil { + if IsElementExist(specialPool.JobType, req.JobType) && IsQueueInSpecialtPool(specialPool.Pool, req.GpuQueue) { + for _, spec := range specialPool.ResourceSpec { + if req.ResourceSpecId == spec.Id { + resourceSpec = spec + break + } + } + } + } + } + } if resourceSpec == nil { log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) @@ -538,3 +559,39 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e return nil } + +func InitSpecialPool() { + if SpecialPools == nil && setting.SpecialPools != "" { + json.Unmarshal([]byte(setting.SpecialPools), &SpecialPools) + } +} + +func IsResourceSpecInSpecialPool(resourceSpecs []*models.ResourceSpec, resourceSpecId int) bool { + if resourceSpecs == nil || len(resourceSpecs) == 0 { + return true + } + for _, v := range resourceSpecs { + if v.Id == resourceSpecId { + return true + } + } + return false +} + +func IsQueueInSpecialtPool(pool []*models.GpuInfo, queue string) bool { + for _, v := range pool { + if v.Queue == queue { + return true + } + } + return false +} + +func IsElementExist(s []string, str string) bool { + for _, v := range s { + if v == str { + return true + } + } + return false +} diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 506e31ba3..d206ed9a3 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -460,6 +460,7 @@ var ( CBCodePathPrefix string JobType string GpuTypes string + SpecialPools string DebugServerHost string ResourceSpecs string MaxDuration int64 @@ -1311,6 +1312,8 @@ func NewContext() { MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") + SpecialPools = sec.Key("SPECIAL_POOL").MustString("") + MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) sec = Cfg.Section("benchmark") diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ee6ec77e4..cf864001a 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -150,6 +150,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + cloudbrain.InitSpecialPool() + if gpuInfos == nil { json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) } @@ -179,6 +181,45 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) } ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + + if cloudbrain.SpecialPools != nil { + var debugGpuTypes []*models.GpuInfo + var trainGpuTypes []*models.GpuInfo + + for _, pool := range cloudbrain.SpecialPools.Pools { + org, _ := models.GetOrgByName(pool.Org) + if org != nil { + isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) + if isOrgMember { + for _, jobType := range pool.JobType { + if jobType == string(models.JobTypeDebug) { + debugGpuTypes = append(debugGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["resource_specs"] = pool.ResourceSpec + } + } else if jobType == string(models.JobTypeTrain) { + trainGpuTypes = append(trainGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["train_resource_specs"] = pool.ResourceSpec + } + } + } + break + } + } + + } + + if len(debugGpuTypes) > 0 { + ctx.Data["gpu_types"] = debugGpuTypes + } + + if len(trainGpuTypes) > 0 { + ctx.Data["train_gpu_types"] = trainGpuTypes + } + + } + ctx.Data["params"] = "" ctx.Data["branchName"] = ctx.Repo.BranchName @@ -218,6 +259,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { repo := ctx.Repo.Repository tpl := tplCloudBrainNew + if jobType == string(models.JobTypeTrain) { + tpl = tplCloudBrainTrainJobNew + } + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { if len(tasks) != 0 { @@ -283,6 +328,14 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { command = commandTrain } + errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) + + if errStr != "" { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr(errStr, tpl, &form) + return + } + if branchName == "" { branchName = cloudbrain.DefaultBranchName } @@ -335,6 +388,42 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } } +/** + 检查用户传输的参数是否符合专属资源池 +*/ +func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string { + if cloudbrain.SpecialPools != nil { + + var isInPoolOrg = false + var matchSpecialPool = false + + for _, specialPool := range cloudbrain.SpecialPools.Pools { + + if cloudbrain.IsElementExist(specialPool.JobType, jobType) && cloudbrain.IsQueueInSpecialtPool(specialPool.Pool, queue) { + if cloudbrain.IsResourceSpecInSpecialPool(specialPool.ResourceSpec, resourceSpecId) { + matchSpecialPool = true + org, _ := models.GetOrgByName(specialPool.Org) + if org != nil { + isInPoolOrg, _ = models.IsOrganizationMember(org.ID, ctx.User.ID) + if isInPoolOrg { + break //传入参数,和专属资源池匹配上了,检查通过 + } + } + } + + } + + } + //资源池有匹配上,但是用户不在相应的组织中,返回错误信息。界面已经过滤了选择,界面操作不会到这个逻辑 + if matchSpecialPool && !isInPoolOrg { + return ctx.Tr("repo.grampus.no_operate_right") + } + + } + //没有匹配到资源池或者没有设置专属资源池,检查通过; 获取和资源池完全匹配检查通过 + return "" +} + func CloudBrainRestart(ctx *context.Context) { var ID = ctx.Params(":id") var resultCode = "0"