diff --git a/models/action.go b/models/action.go index 869acb762..6d93fbcd3 100755 --- a/models/action.go +++ b/models/action.go @@ -67,6 +67,7 @@ const ( ActionChangeUserAvatar //38 ActionCreateGrampusNPUDebugTask //39 ActionCreateGrampusGPUDebugTask //40 + ActionCreateGrampusGCUDebugTask //41 ) // Action represents user operation type and other information to @@ -380,7 +381,8 @@ func (a *Action) IsCloudbrainAction() bool { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, - ActionCreateGrampusGPUTrainTask: + ActionCreateGrampusGPUTrainTask, + ActionCreateGrampusGCUDebugTask: return true } return false diff --git a/models/cloudbrain.go b/models/cloudbrain.go index c721e473c..83864bff8 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -35,6 +35,7 @@ const ( const ( NPUResource = "NPU" GPUResource = "CPU/GPU" + GCUResource = "GCU" AllResource = "all" //notebook storage category @@ -135,6 +136,7 @@ const ( //ComputeResource GPU = "GPU" NPU = "NPU" + GCU = "GCU" ) type Cloudbrain struct { @@ -303,6 +305,9 @@ func (task *Cloudbrain) IsUserHasRight(user *User) bool { func (task *Cloudbrain) IsGPUTask() bool { return task.ComputeResource == GPUResource } +func (task *Cloudbrain) IsGCUTask() bool { + return task.ComputeResource == GCUResource +} func (task *Cloudbrain) IsNPUTask() bool { return task.ComputeResource == NPUResource } @@ -2623,6 +2628,7 @@ type DatasetInfo struct { Name string FullName string Size int + Type int } func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { @@ -2662,8 +2668,14 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn if len(grampusType) > 0 { if grampusType[0] == GPU { dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID - } else { + } else if grampusType[0] == NPU { dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } else if grampusType[0] == GCU { + if attach.Type == TypeCloudBrainOne { + dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } else { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } } } else { @@ -2679,6 +2691,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn Name: fileName, FullName: attach.Name, Size: int(attach.Size), + Type: attach.Type, } if i == 0 { datasetNames = attach.Name diff --git a/models/task_config.go b/models/task_config.go index f86032fc9..14ca6b223 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -39,6 +39,7 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, + ActionCreateGrampusGCUDebugTask, ActionCreateGrampusGPUTrainTask: return TaskCreateCloudbrainTask case ActionCreateRepo: diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 37e6fc1bf..b65b42bdd 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -19,6 +19,7 @@ const ( ProcessorTypeNPU = "npu.huawei.com/NPU" ProcessorTypeGPU = "nvidia.com/gpu" + ProcessorTypeGCU = "enflame-tech.com/gcu" GpuWorkDir = "/tmp/" NpuWorkDir = "/cache/" @@ -108,6 +109,7 @@ type GenerateNotebookJobReq struct { Spec *models.Specification CodeName string ModelPath string //参考启智GPU调试, 挂载/model目录用户的模型可以输出到这个目录 + ModelStorageType int } func getEndPoint() string { @@ -148,6 +150,36 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. } return datasetGrampus, command } +func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { + var datasetGrampus []models.GrampusDataset + var command = "" + obsEndPoint := getEndPoint() + for uuid, datasetInfo := range datasetInfos { + if datasetInfo.Type == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: datasetInfo.DataLocalPath, + ReadOnly: true, + ContainerPath: "/dataset1/" + datasetInfo.Name, + }) + + command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" + + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Bucket, + EndPoint: obsEndPoint, + ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, + ContainerPath: "/dataset/" + datasetInfo.Name, + }) + } + + } + return datasetGrampus, command +} func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() @@ -178,25 +210,45 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job imageUrl = "" req.Command = "" } else { - datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + if ProcessorTypeGCU == req.ProcessType { + datasetGrampus, cpCommand = getDatasetGCUGrampus(req.DatasetInfos) + } else { + datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + } if len(req.ModelName) != 0 { - datasetGrampus = append(datasetGrampus, models.GrampusDataset{ - Name: req.ModelName, - Bucket: setting.Attachment.Minio.Bucket, - EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.PreTrainModelPath, - ReadOnly: true, - ContainerPath: cloudbrain.PretrainModelMountPath, - }) + if req.ModelStorageType == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: req.PreTrainModelPath, + ReadOnly: true, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ReadOnly: true, + ObjectKey: req.PreTrainModelPath, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } + } + codeArchiveName := cloudbrain.DefaultBranchName + ".zip" codeGrampus = models.GrampusDataset{ Name: req.CodeName, Bucket: setting.Attachment.Minio.Bucket, EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + ObjectKey: req.CodeStoragePath + codeArchiveName, ReadOnly: false, ContainerPath: cloudbrain.CodeMountPath, } + if ProcessorTypeGCU == req.ProcessType { + imageUrl = "" + } req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) @@ -215,6 +267,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, Command: req.Command, + CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID), }, }, }) @@ -263,6 +316,8 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job actionType = models.ActionCreateGrampusNPUDebugTask } else if req.ComputeResource == models.GPUResource { actionType = models.ActionCreateGrampusGPUDebugTask + } else if req.ComputeResource == models.GCUResource { + actionType = models.ActionCreateGrampusGCUDebugTask } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 59c8f100b..411aa8ee7 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3135,6 +3135,7 @@ task_gpudebugjob=`created CPU/GPU type debugging task %s` task_c2net_gpudebugjob=`created CPU/GPU type debugging task %s` task_c2net_npudebugjob=`created NPU type debugging task %s` +task_c2ent_gcudebugjob=`created GCU type debugging task %s` task_nputrainjob=`created NPU training task %s` task_inferencejob=`created reasoning task %s` task_benchmark=`created profiling task %s` diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 3793c7382..5a400fd91 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -3154,6 +3154,7 @@ task_gpudebugjob=`创建了CPU/GPU类型调试任务 task_npudebugjob=`创建了NPU类型调试任务 %s` task_c2net_gpudebugjob=`创建了CPU/GPU类型调试任务 %s` task_c2net_npudebugjob=`创建了NPU类型调试任务 %s` +task_c2ent_gcudebugjob=`创建了GCU类型调试任务 %s` task_nputrainjob=`创建了NPU类型训练任务 %s` task_inferencejob=`创建了推理任务 %s` task_benchmark=`创建了评测任务 %s` diff --git a/public/home/home.js b/public/home/home.js index fe843161e..2c70ca8ee 100755 --- a/public/home/home.js +++ b/public/home/home.js @@ -243,11 +243,12 @@ document.onreadystatechange = function () { html += recordPrefix + actionName; html += " " + getRepotext(record) + "" } - else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ + else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" + || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } - else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40"){ + else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } @@ -294,7 +295,7 @@ function getTaskLink(record){ re = re + "/cloudbrain/train-job/" + record.Content; }else if(record.OpType == 32 || record.OpType == 33){ re = re + "/grampus/train-job/" + record.Content; - }else if(record.OpType == 39 || record.OpType == 40){ + }else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41){ re = re + "/grampus/notebook/" + record.Content; } @@ -453,9 +454,10 @@ var actionNameZH={ "33":"创建了CPU/GPU类型训练任务", "35":"创建的数据集 {dataset} 被设置为推荐数据集", "36":"提交了镜像 {image}", - "37": "提交的镜像 {image} 被设置为推荐镜像", + "37":"提交的镜像 {image} 被设置为推荐镜像", "39":"创建了CPU/GPU类型调试任务", "40":"创建了NPU类型调试任务", + "41":"创建了GCU类型训练任务", }; var actionNameEN={ @@ -486,9 +488,10 @@ var actionNameEN={ "33":" created CPU/GPU type training task", "35":" created dataset {dataset} was set as recommended dataset", "36":"committed image {image}", - "37": "committed image {image} was set as recommended image", + "37":"committed image {image} was set as recommended image", "39":" created CPU/GPU type debugging task ", "40":" created NPU type debugging task ", + "41":" created GCU type debugging task ", }; var repoAndOrgZH={ diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index a074119fc..e67568394 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -76,7 +76,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio cloudType := aiTask.Type modelSelectedFile := ctx.Query("modelSelectedFile") //download model zip //train type - if aiTask.ComputeResource == models.NPUResource { + if aiTask.ComputeResource == models.NPUResource || aiTask.ComputeResource == models.GCUResource { cloudType = models.TypeCloudBrainTwo } else if aiTask.ComputeResource == models.GPUResource { cloudType = models.TypeCloudBrainOne diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index c9c2e7403..762c9e706 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -54,6 +54,8 @@ const ( //NPU tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new" tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" + //GCU + tplGrampusNotebookGCUNew base.TplName = "repo/grampus/notebook/gcu/new" ) func GrampusNotebookNew(ctx *context.Context) { @@ -62,6 +64,8 @@ func GrampusNotebookNew(ctx *context.Context) { processType := grampus.ProcessorTypeGPU if notebookType == 1 { processType = grampus.ProcessorTypeNPU + } else if notebookType == 2 { + processType = grampus.ProcessorTypeGCU } err := grampusNotebookNewDataPrepare(ctx, processType) if err != nil { @@ -70,8 +74,10 @@ func GrampusNotebookNew(ctx *context.Context) { } if processType == grampus.ProcessorTypeGPU { ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) - } else { + } else if processType == grampus.ProcessorTypeNPU { ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) + } else if processType == grampus.ProcessorTypeGCU { + ctx.HTML(http.StatusOK, tplGrampusNotebookGCUNew) } } @@ -118,6 +124,12 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook computeSource = models.NPUResource computeSourceSimple = models.NPU codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath + } else if form.Type == 2 { + tpl = tplGrampusNotebookGCUNew + processType = grampus.ProcessorTypeGCU + computeSource = models.GCUResource + computeSourceSimple = models.GCU + codeStoragePath = setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" } limiterCtx := &lock.LockContext{Repo: ctx.Repo.Repository, DisplayJobName: displayJobName, User: ctx.User} @@ -221,7 +233,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook return } - if processType == grampus.ProcessorTypeGPU { + if processType == grampus.ProcessorTypeGPU || processType == grampus.ProcessorTypeGCU { if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusNotebookNewDataPrepare(ctx, processType) @@ -261,7 +273,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook if form.ModelName != "" { //使用预训练模型训练 - _, err := models.QueryModelByPath(form.PreTrainModelUrl) + m, err := models.QueryModelByPath(form.PreTrainModelUrl) if err != nil { log.Error("Can not find model", err) grampusNotebookNewDataPrepare(ctx, processType) @@ -274,7 +286,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook req.ModelVersion = form.ModelVersion req.PreTrainModelUrl = form.PreTrainModelUrl req.PreTrainModelPath = getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) - + req.ModelStorageType = m.Type } _, err = grampus.GenerateNotebookJob(ctx, req) @@ -293,7 +305,7 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["display_job_name"] = displayJobName //get valid images - if processType == grampus.ProcessorTypeNPU { + if processType == grampus.ProcessorTypeNPU || processType == grampus.ProcessorTypeGCU { images, err := grampus.GetImages(processType, string(models.JobTypeDebug)) if err != nil { log.Error("GetImages failed:", err.Error()) @@ -309,6 +321,10 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err computeResourceSimple = models.NPU datasetType = models.TypeCloudBrainTwo computeResource = models.NPUResource + } else if processType == grampus.ProcessorTypeGCU { + computeResourceSimple = models.GCU + datasetType = models.TypeCloudBrainAll + computeResource = models.GCUResource } prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug) @@ -1643,7 +1659,11 @@ func GrampusNotebookRestart(ctx *context.Context) { if task.ComputeResource == models.NPUResource { computeSourceSimple = models.NPU action = models.ActionCreateGrampusNPUDebugTask + } else if task.ComputeResource == models.GCUResource { + computeSourceSimple = models.GCU + action = models.ActionCreateGrampusGCUDebugTask } + spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ JobType: models.JobType(task.JobType), ComputeResource: computeSourceSimple, @@ -1659,7 +1679,7 @@ func GrampusNotebookRestart(ctx *context.Context) { errorMsg = ctx.Tr("points.insufficient_points_balance") break } - if task.IsGPUTask() { + if task.IsGPUTask() || task.IsGCUTask() { if _, err := os.Stat(getOldJobPath(task)); err != nil { log.Error("Can not find job minio path", err) resultCode = "-1" diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 172fa1502..0010164ae 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -72,6 +72,11 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s JobType: []models.JobType{models.JobTypeDebug}, NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GCUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.GCUResource, }} func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go index 7bac92ab8..2a752acf5 100755 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,7 +10,7 @@ import ( "github.com/elliotchance/orderedmap" ) -var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40} +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41} type ClientsManager struct { Clients *orderedmap.OrderedMap diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl index f6d20216a..64665ca3c 100755 --- a/templates/admin/cloudbrain/list.tmpl +++ b/templates/admin/cloudbrain/list.tmpl @@ -96,7 +96,7 @@ {{end}}