Browse Source

Merge remote-tracking branch 'origin/fix-2817' into pretrainmodel

pull/2899/head
liuzx 2 years ago
parent
commit
d0d252eb86
2 changed files with 50 additions and 30 deletions
  1. +7
    -0
      routers/repo/cloudbrain.go
  2. +43
    -30
      routers/repo/grampus.go

+ 7
- 0
routers/repo/cloudbrain.go View File

@@ -157,6 +157,7 @@ func cloudBrainVersionNewDataPrepare(ctx *context.Context) error {
log.Info("spec_id = %d", spec.ID)
ctx.Data["spec_id"] = spec.ID
}
prepareCloudbrainOneSpecs(ctx)
var Parameters modelarts.Parameters
if err := json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
@@ -248,6 +249,12 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
tpl = tplCloudBrainTrainJobNew
}

var jobID = ctx.Params(":jobid")
log.Info("jobIDTest= %s", jobID)
// if jobID != "" {
// tpl = tplCloudBrainTrainJobVersionNew
// }

tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
if err == nil {
if len(tasks) != 0 {


+ 43
- 30
routers/repo/grampus.go View File

@@ -343,10 +343,16 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/"
branchName := form.BranchName
image := strings.TrimSpace(form.Image)
tpl := tplGrampusTrainJobGPUNew

var jobID = ctx.Params(":jobid")
if jobID != "" {
tpl = tplGrampusTrainJobGPUVersionNew
}

if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form)
return
}

@@ -354,14 +360,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form)
return
}

errStr := checkSpecialPool(ctx, "GPU")
if errStr != "" {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(errStr, tpl, &form)
return
}

@@ -370,13 +376,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr("system error", tpl, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form)
return
}
}
@@ -385,7 +391,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := grampusParamCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}

@@ -395,14 +401,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr("the job name did already exist", tpl, &form)
return
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr("system error", tpl, &form)
return
}
}
@@ -415,7 +421,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
})
if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}

@@ -425,7 +431,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return
}

@@ -438,7 +444,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

@@ -447,7 +453,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

@@ -455,7 +461,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := mkModelPath(modelPath); err != nil {
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

@@ -463,7 +469,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

@@ -486,7 +492,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr("Create task failed, internal error", tpl, &form)
return
}

@@ -528,7 +534,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
@@ -595,10 +601,17 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
isLatestVersion := modelarts.IsLatestVersion
versionCount := modelarts.VersionCountOne
engineName := form.EngineName
tpl := tplGrampusTrainJobNPUNew

//判断路由是否存在jobID,若存在,则说明是创建版本
var jobID = ctx.Params(":jobid")
if jobID != "" {
tpl = tplGrampusTrainJobNPUVersionNew
}

if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form)
return
}

@@ -606,7 +619,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form)
return
}

@@ -622,13 +635,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr("system error", tpl, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form)
return
}
}
@@ -637,7 +650,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := grampusParamCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}

@@ -647,14 +660,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr("the job name did already exist", tpl, &form)
return
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr("system error", tpl, &form)
return
}
}
@@ -667,7 +680,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
})
if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}

@@ -676,7 +689,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return
}

@@ -689,7 +702,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

@@ -697,14 +710,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return
}

@@ -726,7 +739,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr("Create task failed, internal error", tpl, &form)
return
}

@@ -769,7 +782,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error())
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")


Loading…
Cancel
Save