From 4616b9be8d8a30c7f6b708e12d90df6a655cdc74 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 9 Dec 2022 14:22:16 +0800 Subject: [PATCH 01/77] #3277 gcu:create notebook --- models/cloudbrain.go | 12 ++++++++++-- modules/grampus/grampus.go | 1 + routers/repo/grampus.go | 20 ++++++++++++++++++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 57e0f825a..b2b24bacc 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -35,6 +35,7 @@ const ( const ( NPUResource = "NPU" GPUResource = "CPU/GPU" + GCUResource = "GCU" AllResource = "all" //notebook storage category @@ -134,6 +135,7 @@ const ( //ComputeResource GPU = "GPU" NPU = "NPU" + GCU = "GCU" ) type Cloudbrain struct { @@ -205,7 +207,7 @@ type Cloudbrain struct { BenchmarkTypeRankLink string `xorm:"-"` StartTime timeutil.TimeStamp EndTime timeutil.TimeStamp - Cleared bool `xorm:"DEFAULT false"` + Cleared bool `xorm:"DEFAULT false"` Spec *Specification `xorm:"-"` } @@ -2589,8 +2591,14 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn if len(grampusType) > 0 { if grampusType[0] == GPU { dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID - } else { + } else if grampusType[0] == NPU { dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } else if grampusType[0] == GCU { + if attach.Type == TypeCloudBrainOne { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } else { + dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } } } else { diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 5854ba051..169bf5625 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -18,6 +18,7 @@ const ( ProcessorTypeNPU = "npu.huawei.com/NPU" ProcessorTypeGPU = "nvidia.com/gpu" + ProcessorTypeGCU = "enflame-tech.com/gcu" GpuWorkDir = "/tmp/" NpuWorkDir = "/cache/" diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 3011ccd79..4655a5dc6 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -53,6 +53,8 @@ const ( //NPU tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new" tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" + //GCU + tplGrampusNotebookGCUNew base.TplName = "repo/grampus/notebook/gcu/new" ) func GrampusNotebookNew(ctx *context.Context) { @@ -61,6 +63,8 @@ func GrampusNotebookNew(ctx *context.Context) { processType := grampus.ProcessorTypeGPU if notebookType == 1 { processType = grampus.ProcessorTypeNPU + } else if notebookType == 2 { + processType = grampus.ProcessorTypeGCU } err := grampusNotebookNewDataPrepare(ctx, processType) if err != nil { @@ -69,8 +73,10 @@ func GrampusNotebookNew(ctx *context.Context) { } if processType == grampus.ProcessorTypeGPU { ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) - } else { + } else if processType == grampus.ProcessorTypeNPU { ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) + } else if processType == grampus.ProcessorTypeGCU { + ctx.HTML(http.StatusOK, tplGrampusNotebookGCUNew) } } @@ -118,6 +124,12 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook computeSource = models.NPUResource computeSourceSimple = models.NPU codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath + } else if form.Type == 2 { + tpl = tplGrampusNotebookGCUNew + processType = grampus.ProcessorTypeGCU + computeSource = models.GCUResource + computeSourceSimple = models.GCU + codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath } lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) @@ -295,7 +307,7 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["display_job_name"] = displayJobName //get valid images - if processType == grampus.ProcessorTypeNPU { + if processType == grampus.ProcessorTypeNPU || processType == grampus.ProcessorTypeGCU { images, err := grampus.GetImages(processType, string(models.JobTypeDebug)) if err != nil { log.Error("GetImages failed:", err.Error()) @@ -311,6 +323,10 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err computeResourceSimple = models.NPU datasetType = models.TypeCloudBrainTwo computeResource = models.NPUResource + } else if processType == grampus.ProcessorTypeGCU { + computeResourceSimple = models.GCU + datasetType = models.TypeCloudBrainAll + computeResource = models.GCUResource } prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug) From 0be5c00ca82538f5a14847fde6513403e61459b4 Mon Sep 17 00:00:00 2001 From: chenshihai Date: Mon, 12 Dec 2022 10:14:02 +0800 Subject: [PATCH 02/77] gcu --- templates/repo/cloudbrain/new.tmpl | 2 +- templates/repo/grampus/notebook/gcu/new.tmpl | 229 +++++++++++++++++++++++++++ templates/repo/grampus/notebook/gpu/new.tmpl | 10 +- templates/repo/grampus/notebook/npu/new.tmpl | 8 +- templates/repo/modelarts/notebook/new.tmpl | 2 +- web_src/js/standalone/specsuse.js | 4 +- web_src/vuepages/const/index.js | 4 +- 7 files changed, 251 insertions(+), 8 deletions(-) create mode 100644 templates/repo/grampus/notebook/gcu/new.tmpl diff --git a/templates/repo/cloudbrain/new.tmpl b/templates/repo/cloudbrain/new.tmpl index c5eaaa1ff..13a0a9676 100755 --- a/templates/repo/cloudbrain/new.tmpl +++ b/templates/repo/cloudbrain/new.tmpl @@ -184,7 +184,7 @@ {{.i18n.Tr "custom.resource_description"}} {{if .CloudBrainPaySwitch}} -
+
{{$.i18n.Tr "points.balance_of_points"}}{{.PointAccount.Balance}}{{$.i18n.Tr "points.points"}}{{$.i18n.Tr "points.expected_time"}}{{$.i18n.Tr "points.hours"}} diff --git a/templates/repo/grampus/notebook/gcu/new.tmpl b/templates/repo/grampus/notebook/gcu/new.tmpl new file mode 100644 index 000000000..0c6c77b96 --- /dev/null +++ b/templates/repo/grampus/notebook/gcu/new.tmpl @@ -0,0 +1,229 @@ +{{template "base/head" .}} +{{template "custom/global_mask" .}} +
+ {{template "repo/header" .}} +
+ + {{if eq .NotStopTaskCount 0}} + {{template "base/alert" .}} + {{end}} + +

+ {{.i18n.Tr "repo.modelarts.train_job.new_debug"}} +

+ {{template "custom/alert_cb" .}} +
+
+ + + {{.CsrfTokenHtml}} +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+ +
+ + +
+
+ + {{template "custom/task_wait_count" .}} +
+
+ + +
+ {{.i18n.Tr "repo.cloudbrain_jobname_err"}} +
+ + {{if .description}} + + {{else}} + + {{end}} +
+
+
+ + +
+ {{template "custom/select_model" .}} + +
+ + +
+ +
+ +
+ +
+ + + + {{.i18n.Tr "custom.resource_description"}} + {{if .CloudBrainPaySwitch}} +
+ {{$.i18n.Tr "points.balance_of_points"}}{{.PointAccount.Balance}}{{$.i18n.Tr "points.points"}}{{$.i18n.Tr "points.expected_time"}}{{$.i18n.Tr "points.hours"}} + + + {{$.i18n.Tr "points.points_acquisition_instructions"}} + +
+ {{end}} +
+
+ + + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+
+
+ + +
+
+{{template "base/footer" .}} + + diff --git a/templates/repo/grampus/notebook/gpu/new.tmpl b/templates/repo/grampus/notebook/gpu/new.tmpl index 943bdf60e..de4488daa 100644 --- a/templates/repo/grampus/notebook/gpu/new.tmpl +++ b/templates/repo/grampus/notebook/gpu/new.tmpl @@ -55,6 +55,14 @@ d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" /> Ascend NPU + + + + + + GCU
@@ -120,7 +128,7 @@ {{.i18n.Tr "custom.resource_description"}} {{if .CloudBrainPaySwitch}} -
+
{{$.i18n.Tr "points.balance_of_points"}}{{.PointAccount.Balance}}{{$.i18n.Tr "points.points"}}{{$.i18n.Tr "points.expected_time"}}{{$.i18n.Tr "points.hours"}} diff --git a/templates/repo/grampus/notebook/npu/new.tmpl b/templates/repo/grampus/notebook/npu/new.tmpl index 986a71696..2cfb235cc 100644 --- a/templates/repo/grampus/notebook/npu/new.tmpl +++ b/templates/repo/grampus/notebook/npu/new.tmpl @@ -48,6 +48,12 @@ Ascend NPU + + + + + + GCU
@@ -111,7 +117,7 @@ {{.i18n.Tr "custom.resource_description"}} {{if .CloudBrainPaySwitch}} -
+
{{$.i18n.Tr "points.balance_of_points"}}{{.PointAccount.Balance}}{{$.i18n.Tr "points.points"}}{{$.i18n.Tr "points.expected_time"}}{{$.i18n.Tr "points.hours"}} diff --git a/templates/repo/modelarts/notebook/new.tmpl b/templates/repo/modelarts/notebook/new.tmpl index 34d9a4cc6..6a5608193 100755 --- a/templates/repo/modelarts/notebook/new.tmpl +++ b/templates/repo/modelarts/notebook/new.tmpl @@ -94,7 +94,7 @@ {{.i18n.Tr "custom.resource_description"}} {{if .CloudBrainPaySwitch}} -
+
{{$.i18n.Tr "points.balance_of_points"}}{{.PointAccount.Balance}}{{$.i18n.Tr "points.points"}}{{$.i18n.Tr "points.expected_time"}}{{$.i18n.Tr "points.hours"}} diff --git a/web_src/js/standalone/specsuse.js b/web_src/js/standalone/specsuse.js index 2154c1b34..d9f313581 100644 --- a/web_src/js/standalone/specsuse.js +++ b/web_src/js/standalone/specsuse.js @@ -1,11 +1,11 @@ -window.ACC_CARD_TYPE = [{ k: 'T4', v: 'T4' }, { k: 'A100', v: 'A100' }, { k: 'V100', v: 'V100' }, { k: 'ASCEND910', v: 'Ascend 910' }, { k: 'MLU270', v: 'MLU270' }, { k: 'RTX3080', v: 'RTX3080' }]; +window.ACC_CARD_TYPE = [{ k: 'T4', v: 'T4' }, { k: 'A100', v: 'A100' }, { k: 'V100', v: 'V100' }, { k: 'ASCEND910', v: 'Ascend 910' }, { k: 'MLU270', v: 'MLU270' }, { k: 'RTX3080', v: 'RTX3080' }, { k: 'ENFLAME-T20', v: 'ENFLAME-T20' }]; window.getListValueWithKey = (list, key, k = 'k', v = 'v', defaultV = '') => { for (let i = 0, iLen = list.length; i < iLen; i++) { const listI = list[i]; if (listI[k] === key) return listI[v]; } - return defaultV; + return defaultV || key; }; window.renderSpecStr = (spec, showPoint, langObj) => { diff --git a/web_src/vuepages/const/index.js b/web_src/vuepages/const/index.js index 7ca6326f0..9a03f3d97 100644 --- a/web_src/vuepages/const/index.js +++ b/web_src/vuepages/const/index.js @@ -11,8 +11,8 @@ export const JOB_TYPE = [{ k: 'DEBUG', v: i18n.t('debugTask') }, { k: 'TRAIN', v // 资源管理 export const CLUSTERS = [{ k: 'OpenI', v: i18n.t('resourcesManagement.OpenI') }, { k: 'C2Net', v: i18n.t('resourcesManagement.C2Net') }]; export const AI_CENTER = [{ k: 'OpenIOne', v: i18n.t('resourcesManagement.OpenIOne') }, { k: 'OpenITwo', v: i18n.t('resourcesManagement.OpenITwo') }, { k: 'OpenIChengdu', v: i18n.t('resourcesManagement.OpenIChengdu') }, { k: 'pclcci', v: i18n.t('resourcesManagement.pclcci') }, { k: 'hefei', v: i18n.t('resourcesManagement.hefeiCenter') }, { k: 'xuchang', v: i18n.t('resourcesManagement.xuchangCenter') }]; -export const COMPUTER_RESOURCES = [{ k: 'GPU', v: 'GPU' }, { k: 'NPU', v: 'NPU' }, { k: 'MLU', v: 'MLU' }]; -export const ACC_CARD_TYPE = [{ k: 'T4', v: 'T4' }, { k: 'A100', v: 'A100' }, { k: 'V100', v: 'V100' }, { k: 'ASCEND910', v: 'Ascend 910' }, { k: 'MLU270', v: 'MLU270' }, { k: 'RTX3080', v: 'RTX3080' }]; +export const COMPUTER_RESOURCES = [{ k: 'GPU', v: 'GPU' }, { k: 'NPU', v: 'NPU' }, { k: 'MLU', v: 'MLU' }, { k: 'GCU', v: 'GCU' }]; +export const ACC_CARD_TYPE = [{ k: 'T4', v: 'T4' }, { k: 'A100', v: 'A100' }, { k: 'V100', v: 'V100' }, { k: 'ASCEND910', v: 'Ascend 910' }, { k: 'MLU270', v: 'MLU270' }, { k: 'RTX3080', v: 'RTX3080' }, { k: 'ENFLAME-T20', v: 'ENFLAME-T20' }]; export const SPECIFICATION_STATUS = [{ k: '1', v: i18n.t('resourcesManagement.willOnShelf') }, { k: '2', v: i18n.t('resourcesManagement.onShelf') }, { k: '3', v: i18n.t('resourcesManagement.offShelf') }]; // 模型 From 400250996885069004c7ba2e3cb713670e203f44 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 12 Dec 2022 14:15:13 +0800 Subject: [PATCH 03/77] #3277 gcu:fix bug --- models/action.go | 1 + models/task_config.go | 1 + modules/grampus/grampus.go | 5 ++++- routers/repo/ai_model_manage.go | 2 +- routers/repo/grampus.go | 5 ++++- services/cloudbrain/cloudbrainTask/count.go | 5 +++++ 6 files changed, 16 insertions(+), 3 deletions(-) diff --git a/models/action.go b/models/action.go index 869acb762..247d4df0c 100755 --- a/models/action.go +++ b/models/action.go @@ -67,6 +67,7 @@ const ( ActionChangeUserAvatar //38 ActionCreateGrampusNPUDebugTask //39 ActionCreateGrampusGPUDebugTask //40 + ActionCreateGrampusGCUDebugTask //41 ) // Action represents user operation type and other information to diff --git a/models/task_config.go b/models/task_config.go index f86032fc9..14ca6b223 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -39,6 +39,7 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, + ActionCreateGrampusGCUDebugTask, ActionCreateGrampusGPUTrainTask: return TaskCreateCloudbrainTask case ActionCreateRepo: diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index ff03680da..2c28bda24 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -156,7 +156,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job var codeGrampus models.GrampusDataset var cpCommand string imageUrl := req.ImageUrl - if ProcessorTypeNPU == req.ProcessType { + if ProcessorTypeNPU == req.ProcessType || ProcessorTypeGCU == req.ProcessType { datasetGrampus = getDatasetGrampus(req.DatasetInfos) if len(req.ModelName) != 0 { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ @@ -225,6 +225,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, Command: req.Command, + CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID), }, }, }) @@ -273,6 +274,8 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job actionType = models.ActionCreateGrampusNPUDebugTask } else if req.ComputeResource == models.GPUResource { actionType = models.ActionCreateGrampusGPUDebugTask + } else if req.ComputeResource == models.GCUResource { + actionType = models.ActionCreateGrampusGCUDebugTask } notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 115842c30..eb25bd8c2 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -74,7 +74,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio cloudType := aiTask.Type modelSelectedFile := ctx.Query("modelSelectedFile") //download model zip //train type - if aiTask.ComputeResource == models.NPUResource { + if aiTask.ComputeResource == models.NPUResource || aiTask.ComputeResource == models.GCUResource { cloudType = models.TypeCloudBrainTwo } else if aiTask.ComputeResource == models.GPUResource { cloudType = models.TypeCloudBrainOne diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 4655a5dc6..643886a8e 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -288,7 +288,6 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook req.ModelVersion = form.ModelVersion req.PreTrainModelUrl = form.PreTrainModelUrl req.PreTrainModelPath = getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) - } _, err = grampus.GenerateNotebookJob(ctx, req) @@ -1660,7 +1659,11 @@ func GrampusNotebookRestart(ctx *context.Context) { if task.ComputeResource == models.NPUResource { computeSourceSimple = models.NPU action = models.ActionCreateGrampusNPUDebugTask + } else if task.ComputeResource == models.GCUResource { + computeSourceSimple = models.GCU + action = models.ActionCreateGrampusGCUDebugTask } + spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ JobType: models.JobType(task.JobType), ComputeResource: computeSourceSimple, diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 372afc7d9..5ebbba798 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -72,6 +72,11 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s JobType: []models.JobType{models.JobTypeDebug}, NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GCUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.GCUResource, }} func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { From 752efc0f7f1f112c0177fc8f976cbdc42aee59fe Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 12 Dec 2022 14:37:16 +0800 Subject: [PATCH 04/77] #3277 gcu:fix bug --- modules/grampus/grampus.go | 2 +- routers/repo/grampus.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 2c28bda24..bc9a35861 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -156,7 +156,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job var codeGrampus models.GrampusDataset var cpCommand string imageUrl := req.ImageUrl - if ProcessorTypeNPU == req.ProcessType || ProcessorTypeGCU == req.ProcessType { + if ProcessorTypeNPU == req.ProcessType { datasetGrampus = getDatasetGrampus(req.DatasetInfos) if len(req.ModelName) != 0 { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 643886a8e..783d5d915 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -228,7 +228,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook return } - if processType == grampus.ProcessorTypeGPU { + if processType == grampus.ProcessorTypeGPU || processType == grampus.ProcessorTypeGCU { if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusNotebookNewDataPrepare(ctx, processType) From 9693fb941b6131c381062aa8fd73221682f2f7c1 Mon Sep 17 00:00:00 2001 From: chenshihai Date: Mon, 12 Dec 2022 17:54:20 +0800 Subject: [PATCH 05/77] gcu --- templates/admin/cloudbrain/list.tmpl | 9 ++++++++- templates/repo/cloudbrain/new.tmpl | 4 ++++ templates/repo/grampus/notebook/gcu/new.tmpl | 4 ++++ templates/repo/grampus/notebook/gpu/new.tmpl | 5 +++++ templates/repo/grampus/notebook/npu/new.tmpl | 6 +++++- templates/user/dashboard/cloudbrains.tmpl | 9 ++++++++- web_src/vuepages/pages/reward/point/utils.js | 10 +++++++--- 7 files changed, 41 insertions(+), 6 deletions(-) diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl index 94f80c0fa..c7ae58a71 100755 --- a/templates/admin/cloudbrain/list.tmpl +++ b/templates/admin/cloudbrain/list.tmpl @@ -96,13 +96,20 @@ {{end}}
- {{if eq .JobType "DEBUG"}} + {{if and (eq .JobType "DEBUG") (eq .Cloudbrain.Type 0)}} {{.DisplayJobName}} + {{else if eq .JobType "DEBUG"}} + + {{.DisplayJobName}} + {{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE")}}
- {{if eq .JobType "DEBUG"}} + {{if and (eq .JobType "DEBUG") (eq .Cloudbrain.Type 0)}} {{.DisplayJobName}} + {{else if eq .JobType "DEBUG"}} + + {{.DisplayJobName}} + {{else if (eq .JobType "SNN4IMAGENET" "BRAINSCORE")}} { const cloudbrain = type === 'INCREASE' ? record.Action?.Cloudbrain : record.Cloudbrain; switch (cloudbrain?.JobType) { case 'DEBUG': - if (cloudbrain.ComputeResource === 'CPU/GPU') { - link += `/cloudbrain/${cloudbrain.ID}`; + if (cloudbrain.Type === 0) { + if (cloudbrain.ComputeResource === 'CPU/GPU') { + link += `/cloudbrain/${cloudbrain.ID}`; + } else { + link += `/modelarts/notebook/${cloudbrain.ID}`; + } } else { - link += `/modelarts/notebook/${cloudbrain.ID}`; + link += `/grampus/notebook/${cloudbrain.ID}`; } break; case 'TRAIN': From d7c317794f98ceca672cd107e8796e3157ed4169 Mon Sep 17 00:00:00 2001 From: chenshihai Date: Mon, 12 Dec 2022 18:40:28 +0800 Subject: [PATCH 06/77] gcu --- options/locale/locale_en-US.ini | 3 +++ options/locale/locale_zh-CN.ini | 3 +++ public/home/home.js | 11 ++++++++++- templates/user/dashboard/feeds.tmpl | 13 +++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 32510b266..d9e8a966e 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3124,6 +3124,9 @@ reject_pull_request = `suggested changes for %s#%[2]s` upload_dataset=`upload dataset %s` task_gpudebugjob=`created CPU/GPU type debugging task %s` task_npudebugjob=`created NPU type debugging task %s` +task_c2entgpudebugjob=`created CPU/GPU type debugging task %s` +task_c2entnpudebugjob=`created NPU type debugging task %s` +task_c2entgcudebugjob=`created GCU type debugging task %s` task_nputrainjob=`created NPU training task %s` task_inferencejob=`created reasoning task %s` task_benchmark=`created profiling task %s` diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 2ad30ca38..d5b96b345 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -3142,6 +3142,9 @@ reject_pull_request=`建议变更 %s#%[2]s` upload_dataset=`上传了数据集文件 %s` task_gpudebugjob=`创建了CPU/GPU类型调试任务 %s` task_npudebugjob=`创建了NPU类型调试任务 %s` +task_c2entgpudebugjob=`创建了CPU/GPU类型调试任务 %s` +task_c2entnpudebugjob=`创建了NPU类型调试任务 %s` +task_c2entgcudebugjob=`创建了GCU类型调试任务 %s` task_nputrainjob=`创建了NPU类型训练任务 %s` task_inferencejob=`创建了推理任务 %s` task_benchmark=`创建了评测任务 %s` diff --git a/public/home/home.js b/public/home/home.js index aeb51b184..3f31c857f 100755 --- a/public/home/home.js +++ b/public/home/home.js @@ -243,7 +243,8 @@ document.onreadystatechange = function () { html += recordPrefix + actionName; html += " " + getRepotext(record) + "" } - else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ + else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" + || record.OpType == "31" || record.OpType == "32" || record.OpType == "33" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } @@ -294,6 +295,8 @@ function getTaskLink(record){ re = re + "/cloudbrain/train-job/" + record.Content; }else if(record.OpType == 32 || record.OpType == 33){ re = re + "/grampus/train-job/" + record.Content; + }else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41){ + re = re + "/grampus/notebook/" + record.Content; } re = encodeURI(re); return re; @@ -451,6 +454,9 @@ var actionNameZH={ "35":"创建的数据集 {dataset} 被设置为推荐数据集", "36":"提交了镜像 {image}", "37":"提交的镜像 {image} 被设置为推荐镜像", + "39":"创建了CPU/GPU类型调试任务", + "40":"创建了NPU类型调试任务", + "41":"创建了GCU类型训练任务", }; var actionNameEN={ @@ -482,6 +488,9 @@ var actionNameEN={ "35":" created dataset {dataset} was set as recommended dataset", "36":"committed image {image}", "37":"committed image {image} was set as recommended image", + "39":" created CPU/GPU type debugging task ", + "40":" created NPU type debugging task ", + "41":" created GCU type debugging task ", }; var repoAndOrgZH={ diff --git a/templates/user/dashboard/feeds.tmpl b/templates/user/dashboard/feeds.tmpl index c2da66ff5..c07d8dee4 100755 --- a/templates/user/dashboard/feeds.tmpl +++ b/templates/user/dashboard/feeds.tmpl @@ -1,3 +1,4 @@ + {{range .Feeds}}
@@ -76,6 +77,12 @@ {{$.i18n.Tr "action.task_gpudebugjob" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 26}} {{$.i18n.Tr "action.task_npudebugjob" .GetRepoLink .Content .RefName | Str2html}} + {{else if eq .GetOpType 39}} + {{$.i18n.Tr "action.task_c2entgpudebugjob" .GetRepoLink .Content .RefName | Str2html}} + {{else if eq .GetOpType 40}} + {{$.i18n.Tr "action.task_c2entnpudebugjob" .GetRepoLink .Content .RefName | Str2html}} + {{else if eq .GetOpType 41}} + {{$.i18n.Tr "action.task_c2entgcudebugjob" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 27}} {{$.i18n.Tr "action.task_nputrainjob" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 28}} @@ -129,6 +136,12 @@ {{else if eq .GetOpType 26}} + {{else if eq .GetOpType 39}} + + {{else if eq .GetOpType 40}} + + {{else if eq .GetOpType 41}} + {{else if eq .GetOpType 27}} {{else if eq .GetOpType 28}} From 142b0f83eb9ac31cb68ebd14319c437170b961f6 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 09:39:41 +0800 Subject: [PATCH 07/77] #3277 gcu:fix bug --- routers/repo/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 783d5d915..fb47ccf1d 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -129,7 +129,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook processType = grampus.ProcessorTypeGCU computeSource = models.GCUResource computeSourceSimple = models.GCU - codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath + codeStoragePath = setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" } lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) From 6a7df0460ba0b88fb9d3bfc005aeb86e04dae21a Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 09:51:38 +0800 Subject: [PATCH 08/77] #3277 gcu:fix bug --- modules/grampus/grampus.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 321abba9c..1d81df448 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -197,6 +197,9 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ReadOnly: false, ContainerPath: cloudbrain.CodeMountPath, } + if ProcessorTypeGCU == req.ProcessType { + imageUrl = "" + } req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) From a908c679d26cedeedc730728f602cdcb16ab92ab Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 10:17:40 +0800 Subject: [PATCH 09/77] #3277 gcu:code unzip --- modules/grampus/grampus.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 1d81df448..d35b04149 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -27,10 +27,11 @@ const ( CodeArchiveName = "master.zip" - BucketRemote = "grampus" - RemoteModelPath = "/output/" + models.ModelSuffix - autoStopDurationMs = 4 * 60 * 60 * 1000 - CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" + BucketRemote = "grampus" + RemoteModelPath = "/output/" + models.ModelSuffix + autoStopDurationMs = 4 * 60 * 60 * 1000 + CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" + CommandGrampusDebug = CommandGpuDebug + "unzip %s;rm %s;" ) var ( @@ -189,18 +190,19 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ContainerPath: cloudbrain.PretrainModelMountPath, }) } + codeArchiveName := cloudbrain.DefaultBranchName + ".zip" codeGrampus = models.GrampusDataset{ Name: req.CodeName, Bucket: setting.Attachment.Minio.Bucket, EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + ObjectKey: req.CodeStoragePath + codeArchiveName, ReadOnly: false, ContainerPath: cloudbrain.CodeMountPath, } if ProcessorTypeGCU == req.ProcessType { imageUrl = "" } - req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) + req.Command = fmt.Sprintf(CommandGrampusDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, codeArchiveName, codeArchiveName) log.Info("debug command:" + req.Command) } From eceb18eb10f500c5e5419d617cf01ed27509baf3 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 10:27:21 +0800 Subject: [PATCH 10/77] #3277 gcu:code unzip --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index d35b04149..176e9ee1e 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -202,7 +202,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job if ProcessorTypeGCU == req.ProcessType { imageUrl = "" } - req.Command = fmt.Sprintf(CommandGrampusDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, codeArchiveName, codeArchiveName) + req.Command = fmt.Sprintf(CommandGrampusDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, cloudbrain.CodeMountPath+"/"+codeArchiveName, cloudbrain.CodeMountPath+"/"+codeArchiveName) log.Info("debug command:" + req.Command) } From df23f3e3f7b9f61eee22e8acd8553bb9015042e6 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 10:40:38 +0800 Subject: [PATCH 11/77] #3277 gcu:code unzip --- modules/grampus/grampus.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 176e9ee1e..665a41123 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -31,7 +31,7 @@ const ( RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" - CommandGrampusDebug = CommandGpuDebug + "unzip %s;rm %s;" + CommandGrampusDebug = "FILE=%s\nif test -f \"$FILE\"; then\n unzip FILE\n rm FILE\nfi\n " + CommandGpuDebug ) var ( @@ -202,7 +202,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job if ProcessorTypeGCU == req.ProcessType { imageUrl = "" } - req.Command = fmt.Sprintf(CommandGrampusDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, cloudbrain.CodeMountPath+"/"+codeArchiveName, cloudbrain.CodeMountPath+"/"+codeArchiveName) + req.Command = fmt.Sprintf(CommandGrampusDebug, cloudbrain.CodeMountPath+"/"+codeArchiveName, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) } From 896debae7d6a6dd0dd7906d5514f97463cf9a34c Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 11:10:18 +0800 Subject: [PATCH 12/77] #3277 gcu:code unzip --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 665a41123..4fa4c0d68 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -31,7 +31,7 @@ const ( RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" - CommandGrampusDebug = "FILE=%s\nif test -f \"$FILE\"; then\n unzip FILE\n rm FILE\nfi\n " + CommandGpuDebug + CommandGrampusDebug = "unzip %s;" + CommandGpuDebug ) var ( From 8b5e692cd340c042046ec457a7ed54cbd7c13dc3 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 11:24:06 +0800 Subject: [PATCH 13/77] #3277 gcu:code unzip --- modules/grampus/grampus.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 4fa4c0d68..99f0dc8ad 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -31,7 +31,7 @@ const ( RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" - CommandGrampusDebug = "unzip %s;" + CommandGpuDebug + CommandGrampusDebug = "unzip -d %s %s;rm %s" + CommandGpuDebug ) var ( @@ -202,7 +202,8 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job if ProcessorTypeGCU == req.ProcessType { imageUrl = "" } - req.Command = fmt.Sprintf(CommandGrampusDebug, cloudbrain.CodeMountPath+"/"+codeArchiveName, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) + codeArchiveContainerPath := cloudbrain.CodeMountPath + "/" + codeArchiveName + req.Command = fmt.Sprintf(CommandGrampusDebug, cloudbrain.CodeMountPath, codeArchiveContainerPath, codeArchiveContainerPath, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) } From 98c1512207cc21ee5ec68266122fd6512b00c7db Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 11:30:01 +0800 Subject: [PATCH 14/77] #3277 gcu:update command --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 99f0dc8ad..f633674a3 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -31,7 +31,7 @@ const ( RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" - CommandGrampusDebug = "unzip -d %s %s;rm %s" + CommandGpuDebug + CommandGrampusDebug = "unzip -d %s %s;rm %s;" + CommandGpuDebug ) var ( From e07f4315beca33968fc04d9e4d38260c85909571 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 11:44:22 +0800 Subject: [PATCH 15/77] #3277 gcu:update --- models/cloudbrain.go | 13 ++++++++----- routers/repo/grampus.go | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 4a010ffd6..a426cdc56 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -304,6 +304,9 @@ func (task *Cloudbrain) IsUserHasRight(user *User) bool { func (task *Cloudbrain) IsGPUTask() bool { return task.ComputeResource == GPUResource } +func (task *Cloudbrain) IsGCUTask() bool { + return task.ComputeResource == GCUResource +} func (task *Cloudbrain) IsNPUTask() bool { return task.ComputeResource == NPUResource } @@ -1974,7 +1977,7 @@ func GetCloudbrainByID(id string) (*Cloudbrain, error) { return getRepoCloudBrain(cb) } -func IsCloudbrainExistByJobName(jobName string)(bool,error){ +func IsCloudbrainExistByJobName(jobName string) (bool, error) { return x.Unscoped().Exist(&Cloudbrain{ JobName: jobName, }) @@ -2138,25 +2141,25 @@ func GetCloudBrainOneStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbra Limit(limit). Find(&cloudbrains) } + /** 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间 - */ +*/ func GetCloudBrainOneStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) endTimeBefore := time.Now().Unix() - int64(days)*24*3600 missEndTimeBefore := endTimeBefore - 24*3600 - sql:=`SELECT id,job_name,job_id from (SELECT DISTINCT ON (job_name) + sql := `SELECT id,job_name,job_id from (SELECT DISTINCT ON (job_name) id, job_name, job_id,status,end_time,updated_unix,cleared FROM cloudbrain where type=0 and job_type='DEBUG' ORDER BY job_name, updated_unix DESC) a where status in ('STOPPED','SUCCEEDED','FAILED') and (((end_time is null or end_time=0) and updated_unix Date: Tue, 13 Dec 2022 14:21:23 +0800 Subject: [PATCH 16/77] #3277 gcu:fix bug of dataset error --- models/cloudbrain.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index a426cdc56..4f8fb4c42 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -2622,9 +2622,9 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" } else if grampusType[0] == GCU { if attach.Type == TypeCloudBrainOne { - dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" - } else { dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } else { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" } } From bd6daf45590760e9ef616418740d304cf390b8f7 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 15:20:45 +0800 Subject: [PATCH 17/77] #3277 gcu:fix bug of npu dataset error --- models/cloudbrain.go | 2 ++ modules/grampus/grampus.go | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 4f8fb4c42..851ab2456 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -2579,6 +2579,7 @@ type DatasetInfo struct { DataLocalPath string Name string FullName string + Type int } func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { @@ -2640,6 +2641,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn DataLocalPath: dataLocalPath, Name: fileName, FullName: attach.Name, + Type: attach.Type, } if i == 0 { datasetNames = attach.Name diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index f633674a3..11d4b253c 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -149,6 +149,35 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. } return datasetGrampus, command } +func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { + var datasetGrampus []models.GrampusDataset + var command = "" + epuEndPoint := getEndPoint() + for uuid, datasetInfo := range datasetInfos { + if datasetInfo.Type == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: datasetInfo.DataLocalPath, + ReadOnly: true, + ContainerPath: "/dataset1/" + datasetInfo.Name, + }) + + command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" + + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Bucket, + EndPoint: epuEndPoint, + ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, + }) + } + + } + return datasetGrampus, command +} func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() @@ -179,7 +208,11 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job imageUrl = "" req.Command = "" } else { - datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + if ProcessorTypeGCU == req.ProcessType { + datasetGrampus, cpCommand = getDatasetGCUGrampus(req.DatasetInfos) + } else { + datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + } if len(req.ModelName) != 0 { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ Name: req.ModelName, From b30d5dfccc093fcf5f8d69b0727091d91e96580a Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 15:42:15 +0800 Subject: [PATCH 18/77] #3277 gcu:fix bug of npu dataset error --- modules/grampus/grampus.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 11d4b253c..ba36bd5cc 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -168,10 +168,11 @@ func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. } else { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ - Name: datasetInfo.FullName, - Bucket: setting.Bucket, - EndPoint: epuEndPoint, - ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, + Name: datasetInfo.FullName, + Bucket: setting.Bucket, + EndPoint: epuEndPoint, + ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, + ContainerPath: "/dataset1/" + datasetInfo.Name, }) } From 3f44696a411850f1bc07b80878d321861c6a2d58 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 13 Dec 2022 15:53:07 +0800 Subject: [PATCH 19/77] #3277 gcu:add command of gcu dataset --- modules/grampus/grampus.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index ba36bd5cc..fbf240479 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -152,7 +152,7 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { var datasetGrampus []models.GrampusDataset var command = "" - epuEndPoint := getEndPoint() + obsEndPoint := getEndPoint() for uuid, datasetInfo := range datasetInfos { if datasetInfo.Type == models.TypeCloudBrainOne { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ @@ -170,10 +170,11 @@ func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. datasetGrampus = append(datasetGrampus, models.GrampusDataset{ Name: datasetInfo.FullName, Bucket: setting.Bucket, - EndPoint: epuEndPoint, + EndPoint: obsEndPoint, ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, ContainerPath: "/dataset1/" + datasetInfo.Name, }) + command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" } } From 3258d9e7f3e6868ec05385b0394be0bb9baea6ad Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 15 Dec 2022 09:45:49 +0800 Subject: [PATCH 20/77] #3277 gcu:update --- modules/grampus/grampus.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index fbf240479..5f3bd07dd 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -172,9 +172,8 @@ func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. Bucket: setting.Bucket, EndPoint: obsEndPoint, ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, - ContainerPath: "/dataset1/" + datasetInfo.Name, + ContainerPath: "/dataset/" + datasetInfo.Name, }) - command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" } } From 64d010af63a42b546e707ba5c8e5aad84aef82ea Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 15 Dec 2022 10:55:31 +0800 Subject: [PATCH 21/77] #3277 gcu:update model selector --- modules/grampus/grampus.go | 28 ++++++++++++++++++++-------- routers/repo/grampus.go | 3 ++- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 5f3bd07dd..c55fd58ea 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -109,6 +109,7 @@ type GenerateNotebookJobReq struct { Spec *models.Specification CodeName string ModelPath string //参考启智GPU调试, 挂载/model目录用户的模型可以输出到这个目录 + ModelStorageType int } func getEndPoint() string { @@ -215,14 +216,25 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) } if len(req.ModelName) != 0 { - datasetGrampus = append(datasetGrampus, models.GrampusDataset{ - Name: req.ModelName, - Bucket: setting.Attachment.Minio.Bucket, - EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.PreTrainModelPath, - ReadOnly: true, - ContainerPath: cloudbrain.PretrainModelMountPath, - }) + if req.ModelStorageType == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: req.PreTrainModelPath, + ReadOnly: true, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ReadOnly: true, + ObjectKey: req.PreTrainModelPath, + }) + } + } codeArchiveName := cloudbrain.DefaultBranchName + ".zip" codeGrampus = models.GrampusDataset{ diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 673960fa1..5264f3a43 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -267,7 +267,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook if form.ModelName != "" { //使用预训练模型训练 - _, err := models.QueryModelByPath(form.PreTrainModelUrl) + m, err := models.QueryModelByPath(form.PreTrainModelUrl) if err != nil { log.Error("Can not find model", err) grampusNotebookNewDataPrepare(ctx, processType) @@ -280,6 +280,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook req.ModelVersion = form.ModelVersion req.PreTrainModelUrl = form.PreTrainModelUrl req.PreTrainModelPath = getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) + req.ModelStorageType = m.Type } _, err = grampus.GenerateNotebookJob(ctx, req) From f7f4935564798ad64986fb47a89ea2bcbc814aaa Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 15 Dec 2022 11:10:53 +0800 Subject: [PATCH 22/77] #3277 gcu:update model selector --- modules/grampus/grampus.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index c55fd58ea..d82dd018f 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -227,11 +227,12 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job }) } else { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ - Name: req.ModelName, - Bucket: setting.Bucket, - EndPoint: getEndPoint(), - ReadOnly: true, - ObjectKey: req.PreTrainModelPath, + Name: req.ModelName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ReadOnly: true, + ObjectKey: req.PreTrainModelPath, + ContainerPath: cloudbrain.PretrainModelMountPath, }) } From be3cd0bbaaf9e4ff83eba04d868ba9f1d9c111a4 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 15 Dec 2022 14:19:58 +0800 Subject: [PATCH 23/77] #3277 gcu:add gcu task to home action --- services/socketwrap/clientManager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go index 7470b1198..0c3d9d1a2 100755 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,7 +10,7 @@ import ( "github.com/elliotchance/orderedmap" ) -var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35} +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 41} type ClientsManager struct { Clients *orderedmap.OrderedMap From 9b22ed316bd01139800d2254cda50af0a4841e13 Mon Sep 17 00:00:00 2001 From: chenshihai Date: Thu, 15 Dec 2022 15:09:27 +0800 Subject: [PATCH 24/77] gcu --- templates/admin/cloudbrain/list.tmpl | 8 ++++---- templates/admin/cloudbrain/search.tmpl | 1 + templates/admin/cloudbrain/search_dashboard.tmpl | 1 + templates/user/dashboard/cloudbrains.tmpl | 8 ++++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl index c7ae58a71..a2c714de5 100755 --- a/templates/admin/cloudbrain/list.tmpl +++ b/templates/admin/cloudbrain/list.tmpl @@ -241,7 +241,7 @@ + data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'> {{$.i18n.Tr "repo.debug"}} {{else}} @@ -249,7 +249,7 @@ + data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'> {{$.i18n.Tr "repo.debug_again"}} {{end}} @@ -275,7 +275,7 @@ {{$.CsrfTokenHtml}} {{$.i18n.Tr "repo.stop"}} @@ -312,7 +312,7 @@ {{else}}
{{$.CsrfTokenHtml}} {{.i18n.Tr "admin.cloudbrain.all_computing_resources"}} CPU/GPU NPU + GCU