diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 4741885a1..0d35cc326 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1073,6 +1073,7 @@ cloudbrain_operate = Operate cloudbrain_status_createtime = Status/Createtime cloudbrain_status_runtime = Running Time cloudbrain_jobname_err=Name must start with a lowercase letter or number,can include lowercase letter,number,_ and -,can not end with _, and can be up to 36 characters long. +cloudbrain_bootfile_err=The bootfile does not exist in the repository cloudbrain_query_fail=Failed to query cloudbrain information. cloudbrain.mirror_tag = Mirror Tag cloudbrain.mirror_description = Mirror Description diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index c330dfad9..d34a18cc1 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -1076,6 +1076,7 @@ cloudbrain_operate=操作 cloudbrain_status_createtime=状态/创建时间 cloudbrain_status_runtime = 运行时长 cloudbrain_jobname_err=只能以小写字母或数字开头且只包含小写字母、数字、_和-,不能以_结尾,最长36个字符。 +cloudbrain_bootfile_err=仓库中不存在启动文件 cloudbrain_query_fail=查询云脑任务失败。 cloudbrain.mirror_tag = 镜像标签 cloudbrain.mirror_description = 镜像描述 diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 76bf9b076..1f33f7023 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -239,6 +239,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath resourceSpecId := form.ResourceSpecId branchName := form.BranchName + bootFile := strings.TrimSpace(form.BootFile) repo := ctx.Repo.Repository tpl := tplCloudBrainNew @@ -305,6 +306,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { command := cloudbrain.GetCloudbrainDebugCommand() if jobType == string(models.JobTypeTrain) { + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) + return + } tpl = tplCloudBrainTrainJobNew commandTrain, err := getTrainJobCommand(form) if err != nil { @@ -413,6 +421,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath resourceSpecId := form.ResourceSpecId branchName := form.BranchName + bootFile := strings.TrimSpace(form.BootFile) labelName := form.LabelName repo := ctx.Repo.Repository @@ -450,6 +459,14 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra return } + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) + return + } + count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 10869537a..ccad2678a 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -215,6 +215,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain return } + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobGPUNew, &form) + return + } + errStr := checkSpecialPool(ctx, "GPU") if errStr != "" { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -399,6 +407,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain return } + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobNPUNew, &form) + return + } + errStr := checkSpecialPool(ctx, "NPU") if errStr != "" { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 948a0e751..92236d235 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1089,7 +1089,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" // dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" - branch_name := form.BranchName + branchName := form.BranchName isLatestVersion := modelarts.IsLatestVersion FlavorName := form.FlavorName VersionCount := modelarts.VersionCount @@ -1117,6 +1117,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err) + trainJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobNew, &form) + return + } + errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain)) if errStr != "" { trainJobErrorNewDataPrepare(ctx, form) @@ -1148,9 +1156,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } gitRepo, _ := git.OpenRepository(repo.RepoPath()) - commitID, _ := gitRepo.GetBranchCommitID(branch_name) + commitID, _ := gitRepo.GetBranchCommitID(branchName) - if err := downloadCode(repo, codeLocalPath, branch_name); err != nil { + if err := downloadCode(repo, codeLocalPath, branchName); err != nil { log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) trainJobErrorNewDataPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form) @@ -1292,7 +1300,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) Parameters: param, CommitID: commitID, IsLatestVersion: isLatestVersion, - BranchName: branch_name, + BranchName: branchName, Params: form.Params, FlavorName: FlavorName, EngineName: EngineName, @@ -1394,7 +1402,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" // dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" - branch_name := form.BranchName + branchName := form.BranchName PreVersionName := form.VersionName FlavorName := form.FlavorName EngineName := form.EngineName @@ -1414,6 +1422,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ return } + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobVersionNew, &form) + return + } + errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain)) if errStr != "" { versionErrorDataPrepare(ctx, form) @@ -1428,8 +1444,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ } gitRepo, _ := git.OpenRepository(repo.RepoPath()) - commitID, _ := gitRepo.GetBranchCommitID(branch_name) - if err := downloadCode(repo, codeLocalPath, branch_name); err != nil { + commitID, _ := gitRepo.GetBranchCommitID(branchName) + if err := downloadCode(repo, codeLocalPath, branchName); err != nil { log.Error("Failed git clone repo to local(!: %s (%v)", repo.FullName(), err) versionErrorDataPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form) @@ -1582,7 +1598,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ Parameters: param, PreVersionId: task.VersionID, CommitID: commitID, - BranchName: branch_name, + BranchName: branchName, FlavorName: FlavorName, EngineName: EngineName, PreVersionName: PreVersionName, @@ -2025,7 +2041,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" - branch_name := form.BranchName + branchName := form.BranchName FlavorName := form.FlavorName EngineName := form.EngineName LabelName := form.LabelName @@ -2060,6 +2076,14 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference return } + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsInferenceJobNew, &form) + return + } + //Determine whether the task name of the task in the project is duplicated tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeInference), displayJobName) if err == nil { @@ -2092,9 +2116,9 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference } gitRepo, _ := git.OpenRepository(repo.RepoPath()) - commitID, _ := gitRepo.GetBranchCommitID(branch_name) + commitID, _ := gitRepo.GetBranchCommitID(branchName) - if err := downloadCode(repo, codeLocalPath, branch_name); err != nil { + if err := downloadCode(repo, codeLocalPath, branchName); err != nil { log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err) inferenceJobErrorNewDataPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsInferenceJobNew, &form) @@ -2178,7 +2202,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference Uuid: uuid, Parameters: param, //modelarts train parameters CommitID: commitID, - BranchName: branch_name, + BranchName: branchName, Params: form.Params, FlavorName: FlavorName, EngineName: EngineName,