|
|
@@ -137,20 +137,45 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
} |
|
|
|
|
|
|
|
if ctx.Cloudbrain != nil { |
|
|
|
ctx.Data["attachment"] = ctx.Cloudbrain.Uuid |
|
|
|
ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile |
|
|
|
ctx.Data["image_id"] = ctx.Cloudbrain.ImageID |
|
|
|
ctx.Data["run_para_list"] = ctx.Cloudbrain.Parameters |
|
|
|
ctx.Data["description"] = ctx.Cloudbrain.Description |
|
|
|
ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName |
|
|
|
ctx.Data["engine_name"] = ctx.Cloudbrain.EngineName |
|
|
|
ctx.Data["WorkServerNumber"] = ctx.Cloudbrain.WorkServerNumber |
|
|
|
ctx.Data["image"] = ctx.Cloudbrain.Image |
|
|
|
ctx.Data["dataset_name"] = ctx.Cloudbrain.DatasetName |
|
|
|
ctx.Data["model_name"] = ctx.Cloudbrain.ModelName |
|
|
|
|
|
|
|
ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion |
|
|
|
ctx.Data["dataset_name"] = ctx.Cloudbrain.DatasetName |
|
|
|
ctx.Data["model_name"] = ctx.Cloudbrain.ModelName |
|
|
|
ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName |
|
|
|
ctx.Data["label_names"] = ctx.Cloudbrain.LabelName |
|
|
|
ctx.Data["PreTrainModelUrl"] = ctx.Cloudbrain.PreTrainModelUrl |
|
|
|
spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) |
|
|
|
if spec != nil { |
|
|
|
ctx.Data["spec_id"] = spec.ID |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func GrampusTrainJobVersionNew(ctx *context.Context) { |
|
|
|
task := ctx.Cloudbrain |
|
|
|
if task.ComputeResource == models.GPUResource { |
|
|
|
err := grampusTrainJobVersionNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get new train-job version info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUVersionNew) |
|
|
|
} else if task.ComputeResource == models.NPUResource { |
|
|
|
err := grampusTrainJobVersionNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get new train-job version info failed", err) |
|
|
|
return |
|
|
@@ -159,115 +184,6 @@ func GrampusTrainJobVersionNew(ctx *context.Context) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func grampusTrainJobVersionNewDataPrepare(ctx *context.Context, processType string) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["display_job_name"] = displayJobName |
|
|
|
|
|
|
|
//get valid images |
|
|
|
images, err := grampus.GetImages(processType) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetImages failed:", err.Error()) |
|
|
|
} else { |
|
|
|
ctx.Data["images"] = images.Infos |
|
|
|
} |
|
|
|
|
|
|
|
grampus.InitSpecialPool() |
|
|
|
|
|
|
|
ctx.Data["GPUEnabled"] = true |
|
|
|
ctx.Data["NPUEnabled"] = true |
|
|
|
includeCenters := make(map[string]struct{}) |
|
|
|
excludeCenters := make(map[string]struct{}) |
|
|
|
if grampus.SpecialPools != nil { |
|
|
|
for _, pool := range grampus.SpecialPools.Pools { |
|
|
|
if pool.IsExclusive { |
|
|
|
if !IsUserInOrgPool(ctx.User.ID, pool) { |
|
|
|
ctx.Data[pool.Type+"Enabled"] = false |
|
|
|
} |
|
|
|
} else { |
|
|
|
if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { |
|
|
|
if IsUserInOrgPool(ctx.User.ID, pool) { |
|
|
|
for _, center := range pool.Pool { |
|
|
|
includeCenters[center.Queue] = struct{}{} |
|
|
|
} |
|
|
|
} else { |
|
|
|
for _, center := range pool.Pool { |
|
|
|
excludeCenters[center.Queue] = struct{}{} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//prepare available specs |
|
|
|
if processType == grampus.ProcessorTypeNPU { |
|
|
|
prepareGrampusTrainSpecs(ctx, models.NPU) |
|
|
|
} else if processType == grampus.ProcessorTypeGPU { |
|
|
|
prepareGrampusTrainSpecs(ctx, models.GPU) |
|
|
|
} |
|
|
|
|
|
|
|
//get branches |
|
|
|
branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetBranches error:", err.Error()) |
|
|
|
} else { |
|
|
|
ctx.Data["branches"] = branches |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName |
|
|
|
ctx.Data["image_name"] = ctx.Cloudbrain.Image |
|
|
|
ctx.Data["image_id"] = ctx.Cloudbrain.ImageID |
|
|
|
ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile |
|
|
|
ctx.Data["description"] = ctx.Cloudbrain.Description |
|
|
|
spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) |
|
|
|
if spec != nil { |
|
|
|
log.Info("spec_id = %d", spec.ID) |
|
|
|
ctx.Data["spec_id"] = spec.ID |
|
|
|
} |
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err = json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["params"] = Parameters.Parameter |
|
|
|
|
|
|
|
_, _, datasetNames, _, err := getDatasUrlListByUUIDS(ctx.Cloudbrain.Uuid) |
|
|
|
if err != nil { |
|
|
|
log.Info("query dataset error," + err.Error()) |
|
|
|
ctx.Data["dataset_name"] = "" |
|
|
|
} else { |
|
|
|
ctx.Data["dataset_name"] = datasetNames |
|
|
|
} |
|
|
|
ctx.Data["uuid"] = ctx.Cloudbrain.Uuid |
|
|
|
ctx.Data["cloudbrain_type"] = models.C2NetCluster |
|
|
|
ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource |
|
|
|
|
|
|
|
if processType == grampus.ProcessorTypeGPU { |
|
|
|
ctx.Data["dataset_type"] = models.TypeCloudBrainOne |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) |
|
|
|
ctx.Data["wait_count"] = waitCount |
|
|
|
} else if processType == grampus.ProcessorTypeNPU { |
|
|
|
ctx.Data["dataset_type"] = models.TypeCloudBrainTwo |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain) |
|
|
|
ctx.Data["wait_count"] = waitCount |
|
|
|
ctx.Data["work_server_number"] = ctx.Cloudbrain.WorkServerNumber |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Data["model_name"] = ctx.Cloudbrain.ModelName |
|
|
|
ctx.Data["label_name"] = ctx.Cloudbrain.LabelName |
|
|
|
ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName |
|
|
|
ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion |
|
|
|
ctx.Data["pre_train_model_url"] = ctx.Cloudbrain.PreTrainModelUrl |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) { |
|
|
|
noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ |
|
|
|
JobType: models.JobTypeTrain, |
|
|
@@ -343,10 +259,16 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
branchName := form.BranchName |
|
|
|
image := strings.TrimSpace(form.Image) |
|
|
|
tpl := tplGrampusTrainJobGPUNew |
|
|
|
|
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
if jobID != "" { |
|
|
|
tpl = tplGrampusTrainJobGPUVersionNew |
|
|
|
} |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(displayJobName) { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -354,14 +276,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil || !bootFileExist { |
|
|
|
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
errStr := checkSpecialPool(ctx, "GPU") |
|
|
|
if errStr != "" { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(errStr, tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -370,13 +292,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -385,7 +307,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := grampusParamCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -395,14 +317,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -415,7 +337,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -425,7 +347,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -438,7 +360,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -447,7 +369,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -455,7 +377,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := mkModelPath(modelPath); err != nil { |
|
|
|
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -463,7 +385,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -486,7 +408,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -528,7 +450,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
@@ -595,10 +517,17 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
isLatestVersion := modelarts.IsLatestVersion |
|
|
|
versionCount := modelarts.VersionCountOne |
|
|
|
engineName := form.EngineName |
|
|
|
tpl := tplGrampusTrainJobNPUNew |
|
|
|
|
|
|
|
//判断路由是否存在jobID,若存在,则说明是创建版本 |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
if jobID != "" { |
|
|
|
tpl = tplGrampusTrainJobNPUVersionNew |
|
|
|
} |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(displayJobName) { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -606,7 +535,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil || !bootFileExist { |
|
|
|
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -622,13 +551,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -637,7 +566,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := grampusParamCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -647,14 +576,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -667,7 +596,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -676,7 +605,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -689,7 +618,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -697,14 +626,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -726,7 +655,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -769,7 +698,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|