Browse Source

Merge pull request '云脑一支持专属资源池' (#2456) from grampus-specialpool2 into V20220718

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/2456
Reviewed-by: lewis <747342561@qq.com>
pull/2457/head
lewis 2 years ago
parent
commit
a690299adf
4 changed files with 155 additions and 5 deletions
  1. +6
    -5
      models/cloudbrain.go
  2. +57
    -0
      modules/cloudbrain/cloudbrain.go
  3. +3
    -0
      modules/setting/setting.go
  4. +89
    -0
      routers/repo/cloudbrain.go

+ 6
- 5
models/cloudbrain.go View File

@@ -570,11 +570,12 @@ type SpecialPools struct {
Pools []*SpecialPool `json:"pools"`
}
type SpecialPool struct {
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
}

type ImageInfosModelArts struct {


+ 57
- 0
modules/cloudbrain/cloudbrain.go View File

@@ -42,6 +42,7 @@ const (
var (
ResourceSpecs *models.ResourceSpecs
TrainResourceSpecs *models.ResourceSpecs
SpecialPools *models.SpecialPools
)

type GenerateCloudBrainTaskReq struct {
@@ -222,6 +223,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
for _, spec := range TrainResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}
} else {
@@ -231,10 +233,29 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
for _, spec := range ResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}

}
//如果没有匹配到spec信息,尝试从专属资源池获取
if resourceSpec == nil && SpecialPools != nil {
for _, specialPool := range SpecialPools.Pools {
if resourceSpec != nil {
break
}
if specialPool.ResourceSpec != nil {
if IsElementExist(specialPool.JobType, req.JobType) && IsQueueInSpecialtPool(specialPool.Pool, req.GpuQueue) {
for _, spec := range specialPool.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}
}
}
}
}

if resourceSpec == nil {
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"])
@@ -538,3 +559,39 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e

return nil
}

func InitSpecialPool() {
if SpecialPools == nil && setting.SpecialPools != "" {
json.Unmarshal([]byte(setting.SpecialPools), &SpecialPools)
}
}

func IsResourceSpecInSpecialPool(resourceSpecs []*models.ResourceSpec, resourceSpecId int) bool {
if resourceSpecs == nil || len(resourceSpecs) == 0 {
return true
}
for _, v := range resourceSpecs {
if v.Id == resourceSpecId {
return true
}
}
return false
}

func IsQueueInSpecialtPool(pool []*models.GpuInfo, queue string) bool {
for _, v := range pool {
if v.Queue == queue {
return true
}
}
return false
}

func IsElementExist(s []string, str string) bool {
for _, v := range s {
if v == str {
return true
}
}
return false
}

+ 3
- 0
modules/setting/setting.go View File

@@ -460,6 +460,7 @@ var (
CBCodePathPrefix string
JobType string
GpuTypes string
SpecialPools string
DebugServerHost string
ResourceSpecs string
MaxDuration int64
@@ -1311,6 +1312,8 @@ func NewContext() {
MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400)
TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("")
TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("")
SpecialPools = sec.Key("SPECIAL_POOL").MustString("")
MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5)

sec = Cfg.Section("benchmark")


+ 89
- 0
routers/repo/cloudbrain.go View File

@@ -150,6 +150,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {

ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType

cloudbrain.InitSpecialPool()

if gpuInfos == nil {
json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos)
}
@@ -179,6 +181,45 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
}
ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec

if cloudbrain.SpecialPools != nil {
var debugGpuTypes []*models.GpuInfo
var trainGpuTypes []*models.GpuInfo

for _, pool := range cloudbrain.SpecialPools.Pools {
org, _ := models.GetOrgByName(pool.Org)
if org != nil {
isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
if isOrgMember {
for _, jobType := range pool.JobType {
if jobType == string(models.JobTypeDebug) {
debugGpuTypes = append(debugGpuTypes, pool.Pool...)
if pool.ResourceSpec != nil {
ctx.Data["resource_specs"] = pool.ResourceSpec
}
} else if jobType == string(models.JobTypeTrain) {
trainGpuTypes = append(trainGpuTypes, pool.Pool...)
if pool.ResourceSpec != nil {
ctx.Data["train_resource_specs"] = pool.ResourceSpec
}
}
}
break
}
}

}

if len(debugGpuTypes) > 0 {
ctx.Data["gpu_types"] = debugGpuTypes
}

if len(trainGpuTypes) > 0 {
ctx.Data["train_gpu_types"] = trainGpuTypes
}

}

ctx.Data["params"] = ""
ctx.Data["branchName"] = ctx.Repo.BranchName

@@ -218,6 +259,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
repo := ctx.Repo.Repository
tpl := tplCloudBrainNew

if jobType == string(models.JobTypeTrain) {
tpl = tplCloudBrainTrainJobNew
}

tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
if err == nil {
if len(tasks) != 0 {
@@ -283,6 +328,14 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
command = commandTrain
}

errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId)

if errStr != "" {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(errStr, tpl, &form)
return
}

if branchName == "" {
branchName = cloudbrain.DefaultBranchName
}
@@ -335,6 +388,42 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
}
}

/**
检查用户传输的参数是否符合专属资源池
*/
func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string {
if cloudbrain.SpecialPools != nil {

var isInPoolOrg = false
var matchSpecialPool = false

for _, specialPool := range cloudbrain.SpecialPools.Pools {

if cloudbrain.IsElementExist(specialPool.JobType, jobType) && cloudbrain.IsQueueInSpecialtPool(specialPool.Pool, queue) {
if cloudbrain.IsResourceSpecInSpecialPool(specialPool.ResourceSpec, resourceSpecId) {
matchSpecialPool = true
org, _ := models.GetOrgByName(specialPool.Org)
if org != nil {
isInPoolOrg, _ = models.IsOrganizationMember(org.ID, ctx.User.ID)
if isInPoolOrg {
break //传入参数,和专属资源池匹配上了,检查通过
}
}
}

}

}
//资源池有匹配上,但是用户不在相应的组织中,返回错误信息。界面已经过滤了选择,界面操作不会到这个逻辑
if matchSpecialPool && !isInPoolOrg {
return ctx.Tr("repo.grampus.no_operate_right")
}

}
//没有匹配到资源池或者没有设置专属资源池,检查通过; 获取和资源池完全匹配检查通过
return ""
}

func CloudBrainRestart(ctx *context.Context) {
var ID = ctx.Params(":id")
var resultCode = "0"


Loading…
Cancel
Save