Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/2456 Reviewed-by: lewis <747342561@qq.com>pull/2457/head
@@ -570,11 +570,12 @@ type SpecialPools struct { | |||||
Pools []*SpecialPool `json:"pools"` | Pools []*SpecialPool `json:"pools"` | ||||
} | } | ||||
type SpecialPool struct { | type SpecialPool struct { | ||||
Org string `json:"org"` | |||||
Type string `json:"type"` | |||||
IsExclusive bool `json:"isExclusive"` | |||||
Pool []*GpuInfo `json:"pool"` | |||||
JobType []string `json:"jobType"` | |||||
Org string `json:"org"` | |||||
Type string `json:"type"` | |||||
IsExclusive bool `json:"isExclusive"` | |||||
Pool []*GpuInfo `json:"pool"` | |||||
JobType []string `json:"jobType"` | |||||
ResourceSpec []*ResourceSpec `json:"resourceSpecs"` | |||||
} | } | ||||
type ImageInfosModelArts struct { | type ImageInfosModelArts struct { | ||||
@@ -42,6 +42,7 @@ const ( | |||||
var ( | var ( | ||||
ResourceSpecs *models.ResourceSpecs | ResourceSpecs *models.ResourceSpecs | ||||
TrainResourceSpecs *models.ResourceSpecs | TrainResourceSpecs *models.ResourceSpecs | ||||
SpecialPools *models.SpecialPools | |||||
) | ) | ||||
type GenerateCloudBrainTaskReq struct { | type GenerateCloudBrainTaskReq struct { | ||||
@@ -222,6 +223,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||||
for _, spec := range TrainResourceSpecs.ResourceSpec { | for _, spec := range TrainResourceSpecs.ResourceSpec { | ||||
if req.ResourceSpecId == spec.Id { | if req.ResourceSpecId == spec.Id { | ||||
resourceSpec = spec | resourceSpec = spec | ||||
break | |||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
@@ -231,10 +233,29 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||||
for _, spec := range ResourceSpecs.ResourceSpec { | for _, spec := range ResourceSpecs.ResourceSpec { | ||||
if req.ResourceSpecId == spec.Id { | if req.ResourceSpecId == spec.Id { | ||||
resourceSpec = spec | resourceSpec = spec | ||||
break | |||||
} | } | ||||
} | } | ||||
} | } | ||||
//如果没有匹配到spec信息,尝试从专属资源池获取 | |||||
if resourceSpec == nil && SpecialPools != nil { | |||||
for _, specialPool := range SpecialPools.Pools { | |||||
if resourceSpec != nil { | |||||
break | |||||
} | |||||
if specialPool.ResourceSpec != nil { | |||||
if IsElementExist(specialPool.JobType, req.JobType) && IsQueueInSpecialtPool(specialPool.Pool, req.GpuQueue) { | |||||
for _, spec := range specialPool.ResourceSpec { | |||||
if req.ResourceSpecId == spec.Id { | |||||
resourceSpec = spec | |||||
break | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if resourceSpec == nil { | if resourceSpec == nil { | ||||
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) | log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) | ||||
@@ -538,3 +559,39 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e | |||||
return nil | return nil | ||||
} | } | ||||
func InitSpecialPool() { | |||||
if SpecialPools == nil && setting.SpecialPools != "" { | |||||
json.Unmarshal([]byte(setting.SpecialPools), &SpecialPools) | |||||
} | |||||
} | |||||
func IsResourceSpecInSpecialPool(resourceSpecs []*models.ResourceSpec, resourceSpecId int) bool { | |||||
if resourceSpecs == nil || len(resourceSpecs) == 0 { | |||||
return true | |||||
} | |||||
for _, v := range resourceSpecs { | |||||
if v.Id == resourceSpecId { | |||||
return true | |||||
} | |||||
} | |||||
return false | |||||
} | |||||
func IsQueueInSpecialtPool(pool []*models.GpuInfo, queue string) bool { | |||||
for _, v := range pool { | |||||
if v.Queue == queue { | |||||
return true | |||||
} | |||||
} | |||||
return false | |||||
} | |||||
func IsElementExist(s []string, str string) bool { | |||||
for _, v := range s { | |||||
if v == str { | |||||
return true | |||||
} | |||||
} | |||||
return false | |||||
} |
@@ -460,6 +460,7 @@ var ( | |||||
CBCodePathPrefix string | CBCodePathPrefix string | ||||
JobType string | JobType string | ||||
GpuTypes string | GpuTypes string | ||||
SpecialPools string | |||||
DebugServerHost string | DebugServerHost string | ||||
ResourceSpecs string | ResourceSpecs string | ||||
MaxDuration int64 | MaxDuration int64 | ||||
@@ -1311,6 +1312,8 @@ func NewContext() { | |||||
MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) | MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) | ||||
TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") | TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") | ||||
TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") | TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") | ||||
SpecialPools = sec.Key("SPECIAL_POOL").MustString("") | |||||
MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) | MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) | ||||
sec = Cfg.Section("benchmark") | sec = Cfg.Section("benchmark") | ||||
@@ -150,6 +150,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||||
ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType | ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType | ||||
cloudbrain.InitSpecialPool() | |||||
if gpuInfos == nil { | if gpuInfos == nil { | ||||
json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) | json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) | ||||
} | } | ||||
@@ -179,6 +181,45 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||||
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) | json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) | ||||
} | } | ||||
ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec | ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec | ||||
if cloudbrain.SpecialPools != nil { | |||||
var debugGpuTypes []*models.GpuInfo | |||||
var trainGpuTypes []*models.GpuInfo | |||||
for _, pool := range cloudbrain.SpecialPools.Pools { | |||||
org, _ := models.GetOrgByName(pool.Org) | |||||
if org != nil { | |||||
isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) | |||||
if isOrgMember { | |||||
for _, jobType := range pool.JobType { | |||||
if jobType == string(models.JobTypeDebug) { | |||||
debugGpuTypes = append(debugGpuTypes, pool.Pool...) | |||||
if pool.ResourceSpec != nil { | |||||
ctx.Data["resource_specs"] = pool.ResourceSpec | |||||
} | |||||
} else if jobType == string(models.JobTypeTrain) { | |||||
trainGpuTypes = append(trainGpuTypes, pool.Pool...) | |||||
if pool.ResourceSpec != nil { | |||||
ctx.Data["train_resource_specs"] = pool.ResourceSpec | |||||
} | |||||
} | |||||
} | |||||
break | |||||
} | |||||
} | |||||
} | |||||
if len(debugGpuTypes) > 0 { | |||||
ctx.Data["gpu_types"] = debugGpuTypes | |||||
} | |||||
if len(trainGpuTypes) > 0 { | |||||
ctx.Data["train_gpu_types"] = trainGpuTypes | |||||
} | |||||
} | |||||
ctx.Data["params"] = "" | ctx.Data["params"] = "" | ||||
ctx.Data["branchName"] = ctx.Repo.BranchName | ctx.Data["branchName"] = ctx.Repo.BranchName | ||||
@@ -218,6 +259,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
tpl := tplCloudBrainNew | tpl := tplCloudBrainNew | ||||
if jobType == string(models.JobTypeTrain) { | |||||
tpl = tplCloudBrainTrainJobNew | |||||
} | |||||
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) | tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) | ||||
if err == nil { | if err == nil { | ||||
if len(tasks) != 0 { | if len(tasks) != 0 { | ||||
@@ -283,6 +328,14 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
command = commandTrain | command = commandTrain | ||||
} | } | ||||
errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) | |||||
if errStr != "" { | |||||
cloudBrainNewDataPrepare(ctx) | |||||
ctx.RenderWithErr(errStr, tpl, &form) | |||||
return | |||||
} | |||||
if branchName == "" { | if branchName == "" { | ||||
branchName = cloudbrain.DefaultBranchName | branchName = cloudbrain.DefaultBranchName | ||||
} | } | ||||
@@ -335,6 +388,42 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
} | } | ||||
} | } | ||||
/** | |||||
检查用户传输的参数是否符合专属资源池 | |||||
*/ | |||||
func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string { | |||||
if cloudbrain.SpecialPools != nil { | |||||
var isInPoolOrg = false | |||||
var matchSpecialPool = false | |||||
for _, specialPool := range cloudbrain.SpecialPools.Pools { | |||||
if cloudbrain.IsElementExist(specialPool.JobType, jobType) && cloudbrain.IsQueueInSpecialtPool(specialPool.Pool, queue) { | |||||
if cloudbrain.IsResourceSpecInSpecialPool(specialPool.ResourceSpec, resourceSpecId) { | |||||
matchSpecialPool = true | |||||
org, _ := models.GetOrgByName(specialPool.Org) | |||||
if org != nil { | |||||
isInPoolOrg, _ = models.IsOrganizationMember(org.ID, ctx.User.ID) | |||||
if isInPoolOrg { | |||||
break //传入参数,和专属资源池匹配上了,检查通过 | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
//资源池有匹配上,但是用户不在相应的组织中,返回错误信息。界面已经过滤了选择,界面操作不会到这个逻辑 | |||||
if matchSpecialPool && !isInPoolOrg { | |||||
return ctx.Tr("repo.grampus.no_operate_right") | |||||
} | |||||
} | |||||
//没有匹配到资源池或者没有设置专属资源池,检查通过; 获取和资源池完全匹配检查通过 | |||||
return "" | |||||
} | |||||
func CloudBrainRestart(ctx *context.Context) { | func CloudBrainRestart(ctx *context.Context) { | ||||
var ID = ctx.Params(":id") | var ID = ctx.Params(":id") | ||||
var resultCode = "0" | var resultCode = "0" | ||||