From 187265cb3bc72667a2d9e361984f417e35243513 Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Wed, 11 Aug 2021 16:05:59 +0800 Subject: [PATCH] add resource spec select --- models/cloudbrain.go | 22 +++++++++++++++++----- modules/auth/cloudbrain.go | 1 + modules/cloudbrain/cloudbrain.go | 27 ++++++++++++++++++++++----- modules/setting/setting.go | 4 +++- routers/repo/cloudbrain.go | 8 +++++++- templates/repo/cloudbrain/new.tmpl | 9 +++++++++ 6 files changed, 59 insertions(+), 12 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index f9c0fb4e0..7a2966f74 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -72,11 +72,11 @@ type CloudBrainLoginResult struct { type TaskRole struct { Name string `json:"name"` - TaskNumber int8 `json:"taskNumber"` - MinSucceededTaskCount int8 `json:"minSucceededTaskCount"` - MinFailedTaskCount int8 `json:"minFailedTaskCount"` - CPUNumber int8 `json:"cpuNumber"` - GPUNumber int8 `json:"gpuNumber"` + TaskNumber int `json:"taskNumber"` + MinSucceededTaskCount int `json:"minSucceededTaskCount"` + MinFailedTaskCount int `json:"minFailedTaskCount"` + CPUNumber int `json:"cpuNumber"` + GPUNumber int `json:"gpuNumber"` MemoryMB int `json:"memoryMB"` ShmMB int `json:"shmMB"` Command string `json:"command"` @@ -286,6 +286,18 @@ type GpuInfo struct { Queue string `json:"queue"` } +type ResourceSpecs struct { + ResourceSpec []*ResourceSpec `json:"resorce_specs"` +} + +type ResourceSpec struct { + Id int `json:"id"` + CpuNum int `json:"cpu"` + GpuNum int `json:"gpu"` + MemMiB int `json:"memMiB"` + ShareMemMiB int `json:"shareMemMiB"` +} + type CommitImageParams struct { Ip string `json:"ip"` TaskContainerId string `json:"taskContainerId"` diff --git a/modules/auth/cloudbrain.go b/modules/auth/cloudbrain.go index 8325dc063..d598b495c 100755 --- a/modules/auth/cloudbrain.go +++ b/modules/auth/cloudbrain.go @@ -13,6 +13,7 @@ type CreateCloudBrainForm struct { JobType string `form:"job_type" binding:"Required"` BenchmarkCategory string `form:"get_benchmark_category"` GpuType string `form:"gpu_type"` + ResourceSpecId int `form:"resource_spec_id" binding:"Required"` } type CommitImageCloudBrainForm struct { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 9fda84ee8..0de1db9a6 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -23,12 +23,29 @@ const ( Success = "S000" ) -func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, jobType, gpuQueue string) error { +var ( + ResourceSpecs *models.ResourceSpecs +) + +func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, jobType, gpuQueue string, resourceSpecId int) error { dataActualPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + models.AttachmentRelativePath(uuid) + uuid + + var resourceSpec *models.ResourceSpec + for _, spec := range ResourceSpecs.ResourceSpec { + if resourceSpecId == spec.Id { + resourceSpec = spec + } + } + + if resourceSpec == nil { + log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"]) + return errors.New("no such resourceSpec") + } + jobResult, err := CreateJob(jobName, models.CreateJobParams{ JobName: jobName, RetryCount: 1, @@ -40,10 +57,10 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, TaskNumber: 1, MinSucceededTaskCount: 1, MinFailedTaskCount: 1, - CPUNumber: 2, - GPUNumber: 1, - MemoryMB: 16384, - ShmMB: 8192, + CPUNumber: resourceSpec.CpuNum, + GPUNumber: resourceSpec.GpuNum, + MemoryMB: resourceSpec.MemMiB, + ShmMB: resourceSpec.ShareMemMiB, Command: command, NeedIBDevice: false, IsMainRole: false, diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 7785028f8..986a18313 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -439,6 +439,7 @@ var ( JobType string GpuTypes string DebugServerHost string + ResourceSpecs string //benchmark config IsBenchmarkEnabled bool @@ -1147,7 +1148,8 @@ func NewContext() { JobPath = sec.Key("JOB_PATH").MustString("/datasets/minio/data/opendata/jobs/") DebugServerHost = sec.Key("DEBUG_SERVER_HOST").MustString("http://192.168.202.73") JobType = sec.Key("GPU_TYPE_DEFAULT").MustString("openidebug") - GpuTypes = sec.Key("GPU_TYPES").MustString("openidebug,openidgx") + GpuTypes = sec.Key("GPU_TYPES").MustString("") + ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 1088f47d3..f6f796e14 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -148,6 +148,11 @@ func CloudBrainNew(ctx *context.Context) { json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) } ctx.Data["gpu_types"] = gpuInfos.GpuInfo + + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled ctx.HTML(200, tplCloudBrainNew) @@ -162,6 +167,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { jobType := form.JobType gpuQueue := setting.JobType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath + resourceSpecId := form.ResourceSpecId if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) { log.Error("jobtype error:", jobType, ctx.Data["msgID"]) @@ -208,7 +214,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { downloadRateCode(repo, jobName, setting.Snn4imagenetCode, snn4imagenetPath, "", "") } - err = cloudbrain.GenerateTask(ctx, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, jobType, gpuQueue) + err = cloudbrain.GenerateTask(ctx, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, jobType, gpuQueue, resourceSpecId) if err != nil { ctx.RenderWithErr(err.Error(), tplCloudBrainNew, &form) return diff --git a/templates/repo/cloudbrain/new.tmpl b/templates/repo/cloudbrain/new.tmpl index e9b2f2732..4e533091e 100755 --- a/templates/repo/cloudbrain/new.tmpl +++ b/templates/repo/cloudbrain/new.tmpl @@ -182,6 +182,15 @@