|
|
@@ -343,10 +343,16 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
branchName := form.BranchName |
|
|
|
image := strings.TrimSpace(form.Image) |
|
|
|
tpl := tplGrampusTrainJobGPUNew |
|
|
|
|
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
if jobID != "" { |
|
|
|
tpl = tplGrampusTrainJobGPUVersionNew |
|
|
|
} |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(displayJobName) { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -354,14 +360,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil || !bootFileExist { |
|
|
|
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
errStr := checkSpecialPool(ctx, "GPU") |
|
|
|
if errStr != "" { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(errStr, tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -370,13 +376,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -385,7 +391,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := grampusParamCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -395,14 +401,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -415,7 +421,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -425,7 +431,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -438,7 +444,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -447,7 +453,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -455,7 +461,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := mkModelPath(modelPath); err != nil { |
|
|
|
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -463,7 +469,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -486,7 +492,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -528,7 +534,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
@@ -595,10 +601,17 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
isLatestVersion := modelarts.IsLatestVersion |
|
|
|
versionCount := modelarts.VersionCountOne |
|
|
|
engineName := form.EngineName |
|
|
|
tpl := tplGrampusTrainJobNPUNew |
|
|
|
|
|
|
|
//判断路由是否存在jobID,若存在,则说明是创建版本 |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
if jobID != "" { |
|
|
|
tpl = tplGrampusTrainJobNPUVersionNew |
|
|
|
} |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(displayJobName) { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -606,7 +619,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil || !bootFileExist { |
|
|
|
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -622,13 +635,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -637,7 +650,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := grampusParamCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -647,14 +660,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
@@ -667,7 +680,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -676,7 +689,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -689,7 +702,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -697,14 +710,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -726,7 +739,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
@@ -769,7 +782,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|