diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 44b669181..bb1241247 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -87,7 +87,7 @@ type Cloudbrain struct { LogUrl string //日志输出的obs路径 PreVersionId int64 //父版本的版本id FlavorCode string //modelarts上的规格id - Description string //描述 + Description string `xorm:"varchar(256)"` //描述 WorkServerNumber int //节点数 FlavorName string //规格名称 EngineName string //引擎名称 diff --git a/modules/storage/obs.go b/modules/storage/obs.go index 39312bfde..9ca26b357 100755 --- a/modules/storage/obs.go +++ b/modules/storage/obs.go @@ -176,10 +176,10 @@ func ObsModelDownload(JobName string, fileName string) (io.ReadCloser, error) { } } -func GetObsListObject(jobName, parentDir string) ([]FileInfo, error) { +func GetObsListObject(jobName, parentDir string, versionOutputPath string) ([]FileInfo, error) { input := &obs.ListObjectsInput{} input.Bucket = setting.Bucket - input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, parentDir), "/") + input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, versionOutputPath, parentDir), "/") strPrefix := strings.Split(input.Prefix, "/") output, err := ObsCli.ListObjects(input) fileInfos := make([]FileInfo, 0) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index c53e62efa..9637bae99 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -303,8 +303,7 @@ func ModelList(ctx *context.APIContext) { return } VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(task.TotalVersionCount) - parentDir = VersionOutputPath + "/" + parentDir - models, err := storage.GetObsListObject(task.JobName, parentDir) + models, err := storage.GetObsListObject(task.JobName, parentDir, VersionOutputPath) if err != nil { log.Info("get TrainJobListModel failed:", err) ctx.ServerError("GetObsListObject:", err) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index a1c754951..e35cf5c6b 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -477,6 +477,94 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { return nil } +func ErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { + ctx.Data["PageIsCloudBrain"] = true + var jobID = ctx.Params(":jobid") + // var versionName = ctx.Params(":version-name") + var versionName = ctx.Query("version_name") + + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) + return err + } + + t := time.Now() + var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = task.JobName + + attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["flavor_infos"] = flavorInfos.Info + + var Parameters modelarts.Parameters + if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + ctx.Data["train_url"] = outputObsPath + + Branches, err := ctx.Repo.GitRepo.GetBranches() + if err != nil { + ctx.ServerError("GetBranches error:", err) + return err + } + ctx.Data["branches"] = Branches + ctx.Data["description"] = form.Description + ctx.Data["dataset_name"] = task.DatasetName + ctx.Data["work_server_number"] = form.WorkServerNumber + ctx.Data["flavor_name"] = form.FlavorName + ctx.Data["engine_name"] = form.EngineName + ctx.Data["flavor_code"] = task.FlavorCode + ctx.Data["engine_id"] = task.EngineID + + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["branch_name"] = form.BranchName + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + ctx.Data["config_list"] = configList.ParaConfigs + + return nil +} + func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(modelarts.TotalVersionCount) @@ -492,6 +580,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + // codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" @@ -529,13 +618,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) }); err != nil { log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) - - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - ctx.Data["datasetName"] = attach.Name - ctx.Data["params"] = form.Params - ctx.Data["branch_name"] = branch_name - trainJobNewDataPrepare(ctx) ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsTrainJobNew, &form) return } @@ -555,7 +637,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } + // parentDir := VersionOutputPath + "/" if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + // if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form) @@ -651,6 +735,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) TotalVersionCount: modelarts.TotalVersionCount, } + //将params转换Parameters.Parameter,出错时返回给前端 + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return + } + err = modelarts.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) @@ -658,7 +749,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment ctx.Data["datasetName"] = attach.Name - ctx.Data["params"] = form.Params + ctx.Data["params"] = Parameters.Parameter ctx.Data["branch_name"] = branch_name ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return @@ -689,7 +780,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath - codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" @@ -701,16 +792,16 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } - attach, err := models.GetAttachmentByUUID(uuid) - if err != nil { - log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) - return - } + // attach, err := models.GetAttachmentByUUID(uuid) + // if err != nil { + // log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) + // return + // } //todo: del the codeLocalPath _, err = ioutil.ReadDir(codeLocalPath) @@ -724,13 +815,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ Branch: branch_name, }); err != nil { log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) - trainJobNewVersionDataPrepare(ctx) - - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - ctx.Data["datasetName"] = attach.Name - ctx.Data["params"] = form.Params - ctx.Data["branch_name"] = branch_name + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form) return } @@ -738,21 +823,23 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) return } if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) return } - if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + parentDir := VersionOutputPath + "/" + // if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form) return } @@ -772,7 +859,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ err := json.Unmarshal([]byte(params), ¶meters) if err != nil { log.Error("Failed to Unmarshal params: %s (%v)", params, err) - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) return } @@ -791,7 +878,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if isSaveParam == "on" { if form.ParameterTemplateName == "" { log.Error("ParameterTemplateName is empty") - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) return } @@ -815,7 +902,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if err != nil { log.Error("Failed to CreateTrainJobConfig: %v", err) - trainJobNewVersionDataPrepare(ctx) + ErrorDataPrepare(ctx, form) ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) return } @@ -862,15 +949,11 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) - trainJobNewVersionDataPrepare(ctx) - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - ctx.Data["datasetName"] = attach.Name - ctx.Data["params"] = form.Params + ErrorDataPrepare(ctx, form) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job/" + jobID) // ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } @@ -963,7 +1046,6 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true - var jobID = ctx.Params(":jobid") repo := ctx.Repo.Repository diff --git a/templates/repo/modelarts/notebook/show.tmpl b/templates/repo/modelarts/notebook/show.tmpl index cac87df79..4850d9b22 100755 --- a/templates/repo/modelarts/notebook/show.tmpl +++ b/templates/repo/modelarts/notebook/show.tmpl @@ -11,7 +11,7 @@ {{.i18n.Tr "repo.cloudbrain"}}
/
- + {{$.i18n.Tr "repo.modelarts.notebook"}}
/
diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index e93a8000b..c480e9048 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -328,7 +328,7 @@
- {{.VersionCount}} + {{.VersionCount}}