|
|
@@ -477,6 +477,94 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func ErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
// var versionName = ctx.Params(":version-name") |
|
|
|
var versionName = ctx.Query("version_name") |
|
|
|
|
|
|
|
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["job_name"] = task.JobName |
|
|
|
|
|
|
|
attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
var flavorInfos modelarts.Flavor |
|
|
|
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["flavor_infos"] = flavorInfos.Info |
|
|
|
|
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["params"] = Parameters.Parameter |
|
|
|
|
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath |
|
|
|
ctx.Data["train_url"] = outputObsPath |
|
|
|
|
|
|
|
Branches, err := ctx.Repo.GitRepo.GetBranches() |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetBranches error:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["branches"] = Branches |
|
|
|
ctx.Data["description"] = form.Description |
|
|
|
ctx.Data["dataset_name"] = task.DatasetName |
|
|
|
ctx.Data["work_server_number"] = form.WorkServerNumber |
|
|
|
ctx.Data["flavor_name"] = form.FlavorName |
|
|
|
ctx.Data["engine_name"] = form.EngineName |
|
|
|
ctx.Data["flavor_code"] = task.FlavorCode |
|
|
|
ctx.Data["engine_id"] = task.EngineID |
|
|
|
|
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["branch_name"] = form.BranchName |
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { |
|
|
|
ctx.Data["PageIsTrainJob"] = true |
|
|
|
VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(modelarts.TotalVersionCount) |
|
|
@@ -492,6 +580,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
isSaveParam := form.IsSaveParam |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath |
|
|
|
// codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" |
|
|
|
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath |
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" |
|
|
|
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" |
|
|
@@ -529,13 +618,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
}); err != nil { |
|
|
|
log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
|
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ctx.Data["branch_name"] = branch_name |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
@@ -555,7 +637,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
// parentDir := VersionOutputPath + "/" |
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
// if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form) |
|
|
@@ -651,6 +735,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
TotalVersionCount: modelarts.TotalVersionCount, |
|
|
|
} |
|
|
|
|
|
|
|
//将params转换Parameters.Parameter,出错时返回给前端 |
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err := json.Unmarshal([]byte(params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
err = modelarts.GenerateTrainJob(ctx, req) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
@@ -658,7 +749,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ctx.Data["params"] = Parameters.Parameter |
|
|
|
ctx.Data["branch_name"] = branch_name |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
@@ -689,7 +780,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
isSaveParam := form.IsSaveParam |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath |
|
|
|
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath |
|
|
|
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" |
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" |
|
|
|
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" |
|
|
|
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" |
|
|
@@ -701,16 +792,16 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
|
|
|
|
if err := paramCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
attach, err := models.GetAttachmentByUUID(uuid) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
// attach, err := models.GetAttachmentByUUID(uuid) |
|
|
|
// if err != nil { |
|
|
|
// log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) |
|
|
|
// return |
|
|
|
// } |
|
|
|
|
|
|
|
//todo: del the codeLocalPath |
|
|
|
_, err = ioutil.ReadDir(codeLocalPath) |
|
|
@@ -724,13 +815,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
Branch: branch_name, |
|
|
|
}); err != nil { |
|
|
|
log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
|
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ctx.Data["branch_name"] = branch_name |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
@@ -738,21 +823,23 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
//todo: upload code (send to file_server todo this work?) |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
parentDir := VersionOutputPath + "/" |
|
|
|
// if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
@@ -772,7 +859,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
@@ -791,7 +878,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
if isSaveParam == "on" { |
|
|
|
if form.ParameterTemplateName == "" { |
|
|
|
log.Error("ParameterTemplateName is empty") |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
@@ -815,7 +902,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to CreateTrainJobConfig: %v", err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
@@ -862,15 +949,11 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job/" + jobID) |
|
|
|
// ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) |
|
|
|
} |
|
|
|
|
|
|
@@ -963,7 +1046,6 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { |
|
|
|
|
|
|
|
func TrainJobShow(ctx *context.Context) { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|