diff --git a/models/action.go b/models/action.go index 869acb762..6d93fbcd3 100755 --- a/models/action.go +++ b/models/action.go @@ -67,6 +67,7 @@ const ( ActionChangeUserAvatar //38 ActionCreateGrampusNPUDebugTask //39 ActionCreateGrampusGPUDebugTask //40 + ActionCreateGrampusGCUDebugTask //41 ) // Action represents user operation type and other information to @@ -380,7 +381,8 @@ func (a *Action) IsCloudbrainAction() bool { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, - ActionCreateGrampusGPUTrainTask: + ActionCreateGrampusGPUTrainTask, + ActionCreateGrampusGCUDebugTask: return true } return false diff --git a/models/cloudbrain.go b/models/cloudbrain.go index a0007c47e..e0df17753 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -36,6 +36,7 @@ const ( const ( NPUResource = "NPU" GPUResource = "CPU/GPU" + GCUResource = "GCU" AllResource = "all" //notebook storage category @@ -136,6 +137,11 @@ const ( //ComputeResource GPU = "GPU" NPU = "NPU" + GCU = "GCU" +) + +const ( + AIModelPath = "aimodels/" ) type Cloudbrain struct { @@ -304,6 +310,9 @@ func (task *Cloudbrain) IsUserHasRight(user *User) bool { func (task *Cloudbrain) IsGPUTask() bool { return task.ComputeResource == GPUResource } +func (task *Cloudbrain) IsGCUTask() bool { + return task.ComputeResource == GCUResource +} func (task *Cloudbrain) IsNPUTask() bool { return task.ComputeResource == NPUResource } @@ -2654,6 +2663,7 @@ type DatasetInfo struct { DataLocalPath string Name string FullName string + Type int Size int } @@ -2694,8 +2704,14 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn if len(grampusType) > 0 { if grampusType[0] == GPU { dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID - } else { + } else if grampusType[0] == NPU { dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } else if grampusType[0] == GCU { + if attach.Type == TypeCloudBrainOne { + dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } else { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } } } else { @@ -2710,6 +2726,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn DataLocalPath: dataLocalPath, Name: fileName, FullName: attach.Name, + Type: attach.Type, Size: int(attach.Size), } if i == 0 { diff --git a/models/task_config.go b/models/task_config.go index f86032fc9..14ca6b223 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -39,6 +39,7 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, + ActionCreateGrampusGCUDebugTask, ActionCreateGrampusGPUTrainTask: return TaskCreateCloudbrainTask case ActionCreateRepo: diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 37e6fc1bf..b65b42bdd 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -19,6 +19,7 @@ const ( ProcessorTypeNPU = "npu.huawei.com/NPU" ProcessorTypeGPU = "nvidia.com/gpu" + ProcessorTypeGCU = "enflame-tech.com/gcu" GpuWorkDir = "/tmp/" NpuWorkDir = "/cache/" @@ -108,6 +109,7 @@ type GenerateNotebookJobReq struct { Spec *models.Specification CodeName string ModelPath string //参考启智GPU调试, 挂载/model目录用户的模型可以输出到这个目录 + ModelStorageType int } func getEndPoint() string { @@ -148,6 +150,36 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. } return datasetGrampus, command } +func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { + var datasetGrampus []models.GrampusDataset + var command = "" + obsEndPoint := getEndPoint() + for uuid, datasetInfo := range datasetInfos { + if datasetInfo.Type == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: datasetInfo.DataLocalPath, + ReadOnly: true, + ContainerPath: "/dataset1/" + datasetInfo.Name, + }) + + command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" + + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Bucket, + EndPoint: obsEndPoint, + ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, + ContainerPath: "/dataset/" + datasetInfo.Name, + }) + } + + } + return datasetGrampus, command +} func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() @@ -178,25 +210,45 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job imageUrl = "" req.Command = "" } else { - datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + if ProcessorTypeGCU == req.ProcessType { + datasetGrampus, cpCommand = getDatasetGCUGrampus(req.DatasetInfos) + } else { + datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + } if len(req.ModelName) != 0 { - datasetGrampus = append(datasetGrampus, models.GrampusDataset{ - Name: req.ModelName, - Bucket: setting.Attachment.Minio.Bucket, - EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.PreTrainModelPath, - ReadOnly: true, - ContainerPath: cloudbrain.PretrainModelMountPath, - }) + if req.ModelStorageType == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: req.PreTrainModelPath, + ReadOnly: true, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ReadOnly: true, + ObjectKey: req.PreTrainModelPath, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } + } + codeArchiveName := cloudbrain.DefaultBranchName + ".zip" codeGrampus = models.GrampusDataset{ Name: req.CodeName, Bucket: setting.Attachment.Minio.Bucket, EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + ObjectKey: req.CodeStoragePath + codeArchiveName, ReadOnly: false, ContainerPath: cloudbrain.CodeMountPath, } + if ProcessorTypeGCU == req.ProcessType { + imageUrl = "" + } req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) @@ -215,6 +267,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, Command: req.Command, + CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID), }, }, }) @@ -263,6 +316,8 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job actionType = models.ActionCreateGrampusNPUDebugTask } else if req.ComputeResource == models.GPUResource { actionType = models.ActionCreateGrampusGPUDebugTask + } else if req.ComputeResource == models.GCUResource { + actionType = models.ActionCreateGrampusGCUDebugTask } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { diff --git a/modules/storage/minio_ext.go b/modules/storage/minio_ext.go index d4a8abba5..6aeb89aab 100755 --- a/modules/storage/minio_ext.go +++ b/modules/storage/minio_ext.go @@ -391,3 +391,19 @@ func GetPartInfos(objectName string, uploadID string) (string, error) { return chunks, nil } + +func IsObjectExist4Minio(bucket, objectName string) (bool, error) { + _, core, err := getClients() + if err != nil { + log.Error("getClients failed:", err.Error()) + return false, err + } + + _, err = core.StatObject(bucket, objectName, miniov6.StatObjectOptions{}) + if err != nil { + log.Error("GetObjectMetadata error.%v", err) + return false, err + } + + return true, nil +} diff --git a/modules/storage/obs.go b/modules/storage/obs.go index cc621cc3c..d00d000b5 100755 --- a/modules/storage/obs.go +++ b/modules/storage/obs.go @@ -645,3 +645,16 @@ func GetObsLogFileName(prefix string) ([]FileInfo, error) { } return fileInfos, nil } + +func IsObjectExist4Obs(bucket, key string) (bool, error) { + + _, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{ + Bucket: bucket, + Key: key, + }) + if err != nil { + log.Error("GetObjectMetadata error.%v", err) + return false, err + } + return true, nil +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index f04b67903..5fc02c609 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1099,8 +1099,8 @@ image_delete_fail=Failed to delete image, please try again later. image_overwrite=You had submitted the same name image before, are you sure to overwrite the original image? download=Download score=Score -wait_count_start = Your current queue position is -wait_count_end = +wait_count_start = Your current queue position is +wait_count_end = file_limit_100 = Display up to 100 files or folders in a single directory images.name = Image Tag images.name_placerholder = Please enter the image name @@ -1360,6 +1360,7 @@ modelconvert.inputshapeerror=Format input error, please input such as: 1,1,32,32 modelconvert.manage.create_error1=A model transformation task with the same name already exists. modelconvert.manage.create_error2=Only one running model transformation task can be created. modelconvert.manage.model_not_exist=The model in the task does not exist or has been deleted. +modelconvert.manage.model_file_not_exist=The model file in the task does not exist or has been deleted. modelconvert.manage.no_operate_right=You have no right to do the operation. debug.manage.model_not_exist=The model in the task does not exist or has been deleted, please create a new debug job. @@ -3146,6 +3147,7 @@ task_gpudebugjob=`created CPU/GPU type debugging task %s` task_c2net_gpudebugjob=`created CPU/GPU type debugging task %s` task_c2net_npudebugjob=`created NPU type debugging task %s` +task_c2ent_gcudebugjob=`created GCU type debugging task %s` task_nputrainjob=`created NPU training task %s` task_inferencejob=`created reasoning task %s` task_benchmark=`created profiling task %s` @@ -3321,6 +3323,7 @@ Stopped_success_update_status_fail=Succeed in stopping th job, but failed to upd load_code_failed=Fail to load code, please check if the right branch is selected. error.dataset_select = dataset select error:the count exceed the limit or has same name +error.partial_datasets_not_available = There are non-existent or deleted files in the selected dataset file, please select again new_train_gpu_tooltips = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the run parameter %s, and please put your model into %s then you can download it online new_debug_gpu_tooltips = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the %s, and please put your model into %s then you can download it online new_debug_gpu_tooltips1 = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the %s. diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index de7fea353..4dc2e4c89 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -1085,7 +1085,7 @@ delete=删除 more=更多 gpu_type_all=全部 model_download=结果下载 -all_result_download=全部结果下载 +all_result_download=全部结果下载 submit_image=提交镜像 modify_image=修改镜像 image_exist=镜像Tag已被使用,请修改镜像Tag。 @@ -1374,6 +1374,7 @@ modelconvert.modelfileempty=请选择模型文件。 modelconvert.manage.create_error1=相同的名称模型转换任务已经存在。 modelconvert.manage.create_error2=只能创建一个正在运行的模型转换任务。 modelconvert.manage.model_not_exist=任务中选择的模型不存在或者已被删除。 +modelconvert.manage.model_file_not_exist=任务中选择的模型文件不存在或者已被删除。 modelconvert.manage.no_operate_right=您没有操作权限。 @@ -3165,6 +3166,7 @@ task_gpudebugjob=`创建了CPU/GPU类型调试任务 task_npudebugjob=`创建了NPU类型调试任务 %s` task_c2net_gpudebugjob=`创建了CPU/GPU类型调试任务 %s` task_c2net_npudebugjob=`创建了NPU类型调试任务 %s` +task_c2ent_gcudebugjob=`创建了GCU类型调试任务 %s` task_nputrainjob=`创建了NPU类型训练任务 %s` task_inferencejob=`创建了推理任务 %s` task_benchmark=`创建了评测任务 %s` @@ -3343,6 +3345,7 @@ load_code_failed=代码加载失败,请确认选择了正确的分支。 error.debug_datasetsize = 数据集大小超过限制('%d'GB) error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集 +error.partial_datasets_not_available = 选择的数据集文件中有不存在或已删除的文件,请重新选择 new_train_gpu_tooltips = 训练脚本存储在 %s 中,数据集存储在 %s 中,预训练模型存放在运行参数 %s 中,训练输出请存储在 %s 中以供后续下载。 new_debug_gpu_tooltips = 项目代码存储在 %s 中,数据集存储在 %s 中,选择的模型存储在 %s 中,调试输出请存储在 %s 中以供后续下载。 new_debug_gpu_tooltips1 = 项目代码存储在 %s 中,数据集存储在 %s 中,选择的模型存储在 %s 中。 diff --git a/public/home/home.js b/public/home/home.js index fe843161e..2c70ca8ee 100755 --- a/public/home/home.js +++ b/public/home/home.js @@ -243,11 +243,12 @@ document.onreadystatechange = function () { html += recordPrefix + actionName; html += " " + getRepotext(record) + "" } - else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ + else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" + || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } - else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40"){ + else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } @@ -294,7 +295,7 @@ function getTaskLink(record){ re = re + "/cloudbrain/train-job/" + record.Content; }else if(record.OpType == 32 || record.OpType == 33){ re = re + "/grampus/train-job/" + record.Content; - }else if(record.OpType == 39 || record.OpType == 40){ + }else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41){ re = re + "/grampus/notebook/" + record.Content; } @@ -453,9 +454,10 @@ var actionNameZH={ "33":"创建了CPU/GPU类型训练任务", "35":"创建的数据集 {dataset} 被设置为推荐数据集", "36":"提交了镜像 {image}", - "37": "提交的镜像 {image} 被设置为推荐镜像", + "37":"提交的镜像 {image} 被设置为推荐镜像", "39":"创建了CPU/GPU类型调试任务", "40":"创建了NPU类型调试任务", + "41":"创建了GCU类型训练任务", }; var actionNameEN={ @@ -486,9 +488,10 @@ var actionNameEN={ "33":" created CPU/GPU type training task", "35":" created dataset {dataset} was set as recommended dataset", "36":"committed image {image}", - "37": "committed image {image} was set as recommended image", + "37":"committed image {image} was set as recommended image", "39":" created CPU/GPU type debugging task ", "40":" created NPU type debugging task ", + "41":" created GCU type debugging task ", }; var repoAndOrgZH={ diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index a074119fc..e67568394 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -76,7 +76,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio cloudType := aiTask.Type modelSelectedFile := ctx.Query("modelSelectedFile") //download model zip //train type - if aiTask.ComputeResource == models.NPUResource { + if aiTask.ComputeResource == models.NPUResource || aiTask.ComputeResource == models.GCUResource { cloudType = models.TypeCloudBrainTwo } else if aiTask.ComputeResource == models.GPUResource { cloudType = models.TypeCloudBrainOne diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index d33fce106..05f857d86 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -53,6 +53,8 @@ const ( //NPU tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new" tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" + //GCU + tplGrampusNotebookGCUNew base.TplName = "repo/grampus/notebook/gcu/new" ) func GrampusNotebookNew(ctx *context.Context) { @@ -61,6 +63,8 @@ func GrampusNotebookNew(ctx *context.Context) { processType := grampus.ProcessorTypeGPU if notebookType == 1 { processType = grampus.ProcessorTypeNPU + } else if notebookType == 2 { + processType = grampus.ProcessorTypeGCU } err := grampusNotebookNewDataPrepare(ctx, processType) if err != nil { @@ -69,8 +73,10 @@ func GrampusNotebookNew(ctx *context.Context) { } if processType == grampus.ProcessorTypeGPU { ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) - } else { + } else if processType == grampus.ProcessorTypeNPU { ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) + } else if processType == grampus.ProcessorTypeGCU { + ctx.HTML(http.StatusOK, tplGrampusNotebookGCUNew) } } @@ -117,6 +123,12 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook computeSource = models.NPUResource computeSourceSimple = models.NPU codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath + } else if form.Type == 2 { + tpl = tplGrampusNotebookGCUNew + processType = grampus.ProcessorTypeGCU + computeSource = models.GCUResource + computeSourceSimple = models.GCU + codeStoragePath = setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" } lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) @@ -199,6 +211,12 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } + uuidArray := strings.Split(uuid, ";") + if datasetInfos == nil || len(datasetInfos) < len(uuidArray) { + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.partial_datasets_not_available"), tpl, &form) + return + } } //prepare code and out path @@ -215,7 +233,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook return } - if processType == grampus.ProcessorTypeGPU { + if processType == grampus.ProcessorTypeGPU || processType == grampus.ProcessorTypeGCU { if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusNotebookNewDataPrepare(ctx, processType) @@ -255,20 +273,26 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook if form.ModelName != "" { //使用预训练模型训练 - _, err := models.QueryModelByPath(form.PreTrainModelUrl) + m, err := models.QueryModelByPath(form.PreTrainModelUrl) if err != nil { log.Error("Can not find model", err) grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr(ctx.Tr("repo.modelconvert.manage.model_not_exist"), tpl, &form) return } + if !cloudbrainTask.IsModelFileExists(m, form.CkptName) { + log.Error("model file not exist.name = %s", form.CkptName) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("repo.modelconvert.manage.model_file_not_exist"), tpl, &form) + return + } req.ModelName = form.ModelName req.LabelName = form.LabelName req.CkptName = form.CkptName req.ModelVersion = form.ModelVersion req.PreTrainModelUrl = form.PreTrainModelUrl req.PreTrainModelPath = getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) - + req.ModelStorageType = m.Type } _, err = grampus.GenerateNotebookJob(ctx, req) @@ -287,7 +311,7 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["display_job_name"] = displayJobName //get valid images - if processType == grampus.ProcessorTypeNPU { + if processType == grampus.ProcessorTypeNPU || processType == grampus.ProcessorTypeGCU { images, err := grampus.GetImages(processType, string(models.JobTypeDebug)) if err != nil { log.Error("GetImages failed:", err.Error()) @@ -303,6 +327,10 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err computeResourceSimple = models.NPU datasetType = models.TypeCloudBrainTwo computeResource = models.NPUResource + } else if processType == grampus.ProcessorTypeGCU { + computeResourceSimple = models.GCU + datasetType = models.TypeCloudBrainAll + computeResource = models.GCUResource } prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug) @@ -1308,7 +1336,7 @@ func GrampusTrainJobShow(ctx *context.Context) { taskList := make([]*models.Cloudbrain, 0) taskList = append(taskList, task) prepareSpec4Show(ctx, task) - + ctx.Data["version_list_task"] = taskList ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) @@ -1654,7 +1682,11 @@ func GrampusNotebookRestart(ctx *context.Context) { if task.ComputeResource == models.NPUResource { computeSourceSimple = models.NPU action = models.ActionCreateGrampusNPUDebugTask + } else if task.ComputeResource == models.GCUResource { + computeSourceSimple = models.GCU + action = models.ActionCreateGrampusGCUDebugTask } + spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ JobType: models.JobType(task.JobType), ComputeResource: computeSourceSimple, @@ -1670,7 +1702,7 @@ func GrampusNotebookRestart(ctx *context.Context) { errorMsg = ctx.Tr("points.insufficient_points_balance") break } - if task.IsGPUTask() { + if task.IsGPUTask() || task.IsGCUTask() { if _, err := os.Stat(getOldJobPath(task)); err != nil { log.Error("Can not find job minio path", err) resultCode = "-1" diff --git a/services/cloudbrain/cloudbrainTask/ai_model.go b/services/cloudbrain/cloudbrainTask/ai_model.go new file mode 100644 index 000000000..02cc392be --- /dev/null +++ b/services/cloudbrain/cloudbrainTask/ai_model.go @@ -0,0 +1,30 @@ +package cloudbrainTask + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" +) + +func IsModelFileExists(model *models.AiModelManage, fileName string) bool { + if model.Type == models.TypeCloudBrainTwo { + key := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" + fileName + log.Info("IsModelFileExists TypeCloudBrainTwo key=%s", key) + isExist, err := storage.IsObjectExist4Obs(setting.Bucket, key) + if err != nil { + return false + } + return isExist + } else if model.Type == models.TypeCloudBrainOne { + prefix := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" + objectName := prefix + fileName + log.Info("IsModelFileExists TypeCloudBrainOne objectName=%s", objectName) + isExist, err := storage.IsObjectExist4Minio(setting.Attachment.Minio.Bucket, objectName) + if err != nil { + return false + } + return isExist + } + return false +} diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 172fa1502..0010164ae 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -72,6 +72,11 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s JobType: []models.JobType{models.JobTypeDebug}, NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GCUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.GCUResource, }} func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go index 7bac92ab8..2a752acf5 100755 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,7 +10,7 @@ import ( "github.com/elliotchance/orderedmap" ) -var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40} +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41} type ClientsManager struct { Clients *orderedmap.OrderedMap diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl index 5040de2cc..91b0e913a 100755 --- a/templates/admin/cloudbrain/list.tmpl +++ b/templates/admin/cloudbrain/list.tmpl @@ -96,7 +96,7 @@ {{end}}
- {{if eq .JobType "DEBUG"}} + {{if eq .JobType "DEBUG"}} @@ -233,7 +233,7 @@ {{if eq .Status "RUNNING" "WAITING" "CREATING" "STARTING"}} {{$.i18n.Tr "repo.debug"}} diff --git a/templates/admin/cloudbrain/search.tmpl b/templates/admin/cloudbrain/search.tmpl index 1d4bb2f39..95332ad0f 100644 --- a/templates/admin/cloudbrain/search.tmpl +++ b/templates/admin/cloudbrain/search.tmpl @@ -45,6 +45,7 @@ {{.i18n.Tr "admin.cloudbrain.all_computing_resources"}} CPU/GPU NPU + GCU
@@ -119,7 +127,7 @@ {{.i18n.Tr "custom.resource_description"}} {{if .CloudBrainPaySwitch}} -
+
{{$.i18n.Tr "points.balance_of_points"}}{{.PointAccount.Balance}}{{$.i18n.Tr "points.points"}}{{$.i18n.Tr "points.expected_time"}}{{$.i18n.Tr "points.hours"}} @@ -147,7 +155,7 @@ {{template "base/footer" .}} - -