From d6a1887e655a6f2fb4df3b36c672e8988951e8a3 Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 16 Dec 2021 17:06:27 +0800 Subject: [PATCH 001/108] =?UTF-8?q?=E6=8E=A8=E7=90=86=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/auth/modelarts.go | 21 + modules/modelarts/modelarts.go | 98 +++++ routers/repo/modelarts.go | 531 ++++++++++++++++++++++- routers/routes/routes.go | 12 + templates/repo/modelarts/inferencejob/index.tmpl | 1 + templates/repo/modelarts/inferencejob/new.tmpl | 1 + templates/repo/modelarts/inferencejob/show.tmpl | 1 + 7 files changed, 655 insertions(+), 10 deletions(-) create mode 100644 templates/repo/modelarts/inferencejob/index.tmpl create mode 100644 templates/repo/modelarts/inferencejob/new.tmpl create mode 100644 templates/repo/modelarts/inferencejob/show.tmpl diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 59f72696e..a34c87870 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -45,6 +45,27 @@ type CreateModelArtsTrainJobForm struct { EngineName string `form:"engine_names" binding:"Required"` } +type CreateModelArtsInferenceJobForm struct { + JobName string `form:"job_name" binding:"Required"` + Attachment string `form:"attachment" binding:"Required"` + BootFile string `form:"boot_file" binding:"Required"` + WorkServerNumber int `form:"work_server_number" binding:"Required"` + EngineID int `form:"engine_id" binding:"Required"` + PoolID string `form:"pool_id" binding:"Required"` + Flavor string `form:"flavor" binding:"Required"` + Params string `form:"run_para_list" binding:"Required"` + Description string `form:"description"` + IsSaveParam string `form:"is_save_para"` + ParameterTemplateName string `form:"parameter_template_name"` + PrameterDescription string `form:"parameter_description"` + BranchName string `form:"branch_name" binding:"Required"` + VersionName string `form:"version_name" binding:"Required"` + FlavorName string `form:"flaver_names" binding:"Required"` + EngineName string `form:"engine_names" binding:"Required"` + TrainUrl string `form:"train_url" binding:"Required"` + CkptName string `form:"ckpt_name" binding:"Required"` +} + func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { return validate(errs, ctx.Data, f, ctx.Locale) } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 273d59012..c0a003450 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -38,6 +38,7 @@ const ( // "]}" CodePath = "/code/" OutputPath = "/output/" + ResultPath = "/result/" LogPath = "/log/" JobPath = "/job/" OrderDesc = "desc" //向下查询 @@ -117,6 +118,33 @@ type GenerateTrainJobVersionReq struct { TotalVersionCount int } +type GenerateInferenceJobReq struct { + JobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + FlavorCode string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + IsLatestVersion string + Params string + BranchName string + PreVersionId int64 + PreVersionName string + FlavorName string + VersionCount int + EngineName string + TotalVersionCount int +} + type VersionInfo struct { Version []struct { ID int `json:"id"` @@ -449,3 +477,73 @@ func GetVersionOutputPathByTotalVersionCount(TotalVersionCount int) (VersionOutp VersionOutputPath = "V" + talVersionCountToString return VersionOutputPath } + +func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { + jobResult, err := createTrainJob(models.CreateTrainJobParams{ + JobName: req.JobName, + Description: req.Description, + Config: models.Config{ + WorkServerNum: req.WorkServerNumber, + AppUrl: req.CodeObsPath, + BootFileUrl: req.BootFileUrl, + DataUrl: req.DataUrl, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + LogUrl: req.LogUrl, + PoolID: req.PoolID, + CreateVersion: true, + Flavor: models.Flavor{ + Code: req.FlavorCode, + }, + Parameter: req.Parameters, + }, + }) + if err != nil { + log.Error("CreateJob failed: %v", err.Error()) + return err + } + + attach, err := models.GetAttachmentByUUID(req.Uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) + return err + } + + err = models.CreateCloudbrain(&models.Cloudbrain{ + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: NPUResource, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.FlavorCode, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, + EngineName: req.EngineName, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + }) + + if err != nil { + log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) + return err + } + + return nil +} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 920205334..c19eacb7a 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "io" + "io/ioutil" "net/http" "os" "path" @@ -38,6 +39,10 @@ const ( tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new" + + tplModelArtsInferenceJobIndex base.TplName = "repo/modelarts/inferencejob/index" + tplModelArtsInferenceJobNew base.TplName = "repo/modelarts/inferencejob/new" + tplModelArtsInferenceJobShow base.TplName = "repo/modelarts/inferencejob/show" ) func DebugJobIndex(ctx *context.Context) { @@ -737,11 +742,16 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) // } //todo: del the codeLocalPath - // _, err := ioutil.ReadDir(codeLocalPath) - // if err == nil { - // os.RemoveAll(codeLocalPath) - // } - os.RemoveAll(codeLocalPath) + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) + return + } + // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) commitID, _ := gitRepo.GetBranchCommitID(branch_name) @@ -954,11 +964,16 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ // } //todo: del the codeLocalPath - // _, err = ioutil.ReadDir(codeLocalPath) - // if err == nil { - // os.RemoveAll(codeLocalPath) - // } - os.RemoveAll(codeLocalPath) + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) + return + } + // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) commitID, _ := gitRepo.GetBranchCommitID(branch_name) @@ -1196,6 +1211,20 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { return nil } +func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) error { + if !strings.HasSuffix(form.BootFile, ".py") { + log.Error("the boot file(%s) must be a python file", form.BootFile) + return errors.New("启动文件必须是python文件") + } + + if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 { + log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber) + return errors.New("计算节点数必须在1-25之间") + } + + return nil +} + func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") @@ -1474,3 +1503,485 @@ func ModelDownload(ctx *context.Context) { } http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) } + +func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) { + ctx.Data["PageIsTrainJob"] = true + jobName := form.JobName + uuid := form.Attachment + description := form.Description + workServerNumber := form.WorkServerNumber + engineID := form.EngineID + bootFile := form.BootFile + flavorCode := form.Flavor + params := form.Params + poolID := form.PoolID + isSaveParam := form.IsSaveParam + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" + branch_name := form.BranchName + isLatestVersion := modelarts.IsLatestVersion + FlavorName := form.FlavorName + VersionCount := modelarts.VersionCount + EngineName := form.EngineName + trainUrl := form.TrainUrl + ckptName := form.CkptName + + count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsInferenceJobNew, &form) + return + } + } + + if err := paramCheckCreateInferenceJob(form); err != nil { + log.Error("paramCheckCreateInferenceJob failed:(%v)", err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) + return + } + + //todo: del the codeLocalPath + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsInferenceJobNew, &form) + return + } + // os.RemoveAll(codeLocalPath) + + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branch_name) + + if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ + Branch: branch_name, + }); err != nil { + log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsInferenceJobNew, &form) + return + } + + //todo: upload code (send to file_server todo this work?) + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath); err != nil { + log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to obsMkdir_result", tplModelArtsInferenceJobNew, &form) + return + } + + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsInferenceJobNew, &form) + return + } + + // parentDir := VersionOutputPath + "/" + if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + // if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsInferenceJobNew, &form) + return + } + + //todo: del local code? + + var parameters models.Parameters + param := make([]models.Parameter, 0) + param = append(param, models.Parameter{ + Label: modelarts.TrainUrl, + Value: trainUrl, + }, models.Parameter{ + Label: modelarts.DataUrl, + Value: dataPath, + }) + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("运行参数错误", tplModelArtsInferenceJobNew, &form) + return + } + + for _, parameter := range parameters.Parameter { + if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { + param = append(param, models.Parameter{ + Label: parameter.Label, + Value: parameter.Value, + }) + } + } + } + + req := &modelarts.GenerateInferenceJobReq{ + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: trainUrl, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: parameters.Parameter, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branch_name, + Params: form.Params, + FlavorName: FlavorName, + EngineName: EngineName, + VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, + } + + //将params转换Parameters.Parameter,出错时返回给前端 + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return + } + + err = modelarts.GenerateTrainJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error()) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") +} +func InferenceJobIndex(ctx *context.Context) { + MustEnableModelArts(ctx) + + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + + tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + IsLatestVersion: modelarts.IsLatestVersion, + }) + if err != nil { + ctx.ServerError("Cloudbrain", err) + return + } + + for i, task := range tasks { + tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) + tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) + } + + pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) + pager.SetDefaultParams(ctx) + ctx.Data["Page"] = pager + + ctx.Data["PageIsCloudBrain"] = true + ctx.Data["Tasks"] = tasks + ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) + ctx.Data["RepoIsEmpty"] = repo.IsEmpty + ctx.HTML(200, tplModelArtsInferenceJobIndex) +} +func InferenceJobNew(ctx *context.Context) { + err := inferenceJobNewDataPrepare(ctx) + if err != nil { + ctx.ServerError("get new inference-job info failed", err) + return + } + ctx.HTML(200, tplModelArtsInferenceJobNew) +} +func inferenceJobNewDataPrepare(ctx *context.Context) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var jobName = "inference" + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = jobName + + attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["flavor_infos"] = flavorInfos.Info + + resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + ctx.Data["result_url"] = resultObsPath + ctx.Data["params"] = "" + ctx.Data["branchName"] = ctx.Repo.BranchName + + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + ctx.Data["config_list"] = configList.ParaConfigs + + return nil +} + +func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = jobName + + attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["flavor_infos"] = flavorInfos.Info + + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + ctx.Data["train_url"] = outputObsPath + + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + var Parameters modelarts.Parameters + if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["config_list"] = configList.ParaConfigs + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["branch_name"] = form.BranchName + + return nil +} +func InferenceJobShow(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + var jobID = ctx.Params(":jobid") + + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + JobID: jobID, + }) + + if err != nil { + log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + //设置权限 + canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID) + if err != nil { + ctx.ServerError("canNewJob failed", err) + return + } + ctx.Data["canNewJob"] = canNewJob + + //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 + for i, _ := range VersionListTasks { + + var parameters models.Parameters + + err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) + trainJobNewDataPrepare(ctx) + return + } + + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + "; " + paramTemp = paramTemp + param + } + VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] + } else { + VersionListTasks[i].Parameters = "" + } + } + + pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) + pager.SetDefaultParams(ctx) + ctx.Data["Page"] = pager + ctx.Data["jobID"] = jobID + ctx.Data["jobName"] = VersionListTasks[0].JobName + ctx.Data["version_list_task"] = VersionListTasks + ctx.Data["version_list_count"] = VersionListCount + ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) +} +func InferenceJobStop(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + task, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} + +func InferenceJobDel(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + repo := ctx.Repo.Repository + + VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + JobID: jobID, + }) + if err != nil { + ctx.ServerError("get VersionListTasks failed", err) + return + } + + //删除modelarts上的任务记录 + _, err = modelarts.DelTrainJob(jobID) + if err != nil { + log.Error("DelTrainJob(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + //删除数据库Cloudbrain表的记录 + for _, task := range VersionListTasks { + err = models.DeleteJob(&task.Cloudbrain) + if err != nil { + ctx.ServerError("DeleteJob failed", err) + return + } + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} + +func ResultDownload(ctx *context.Context) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + versionName := ctx.Query("version_name") + parentDir := ctx.Query("parent_dir") + fileName := ctx.Query("file_name") + log.Info("DownloadSingleModelFile start.") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + + path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/") + log.Info("Download path is:%s", path) + + url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) + if err != nil { + log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("GetObsCreateSignedUrl", err) + return + } + http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 30e486b98..05ae5fbef 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1022,6 +1022,18 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) + + m.Group("/inference-job", func() { + m.Get("", reqRepoCloudBrainReader, repo.InferenceJobIndex) + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.InferenceJobShow) + m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.InferenceJobStop) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.InferenceJobDel) + m.Get("/model_download", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.ResultDownload) + }) + m.Get("/create", reqRepoCloudBrainWriter, repo.InferenceJobNew) + m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) + }) }, context.RepoRef()) m.Group("/blockchain", func() { diff --git a/templates/repo/modelarts/inferencejob/index.tmpl b/templates/repo/modelarts/inferencejob/index.tmpl new file mode 100644 index 000000000..30d74d258 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/index.tmpl @@ -0,0 +1 @@ +test \ No newline at end of file diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl new file mode 100644 index 000000000..30d74d258 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -0,0 +1 @@ +test \ No newline at end of file diff --git a/templates/repo/modelarts/inferencejob/show.tmpl b/templates/repo/modelarts/inferencejob/show.tmpl new file mode 100644 index 000000000..30d74d258 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/show.tmpl @@ -0,0 +1 @@ +test \ No newline at end of file From 303db82ac04cbac982f367168025e5a5b2fc7d1a Mon Sep 17 00:00:00 2001 From: liuzx Date: Wed, 22 Dec 2021 16:20:12 +0800 Subject: [PATCH 002/108] update --- models/cloudbrain.go | 8 +++- modules/auth/modelarts.go | 2 + modules/modelarts/modelarts.go | 105 +++++++++++++++++++++-------------------- routers/repo/modelarts.go | 59 ++++++++++------------- 4 files changed, 87 insertions(+), 87 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 509f4a9ed..8927147af 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -30,6 +30,7 @@ const ( JobTypeSnn4imagenet JobType = "SNN4IMAGENET" JobTypeBrainScore JobType = "BRAINSCORE" JobTypeTrain JobType = "TRAIN" + JobTypeInference JobType = "INFERENCE" //notebook ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 @@ -105,7 +106,7 @@ type Cloudbrain struct { ComputeResource string //计算资源,例如npu EngineID int64 //引擎id - TrainUrl string //输出的obs路径 + TrainUrl string //输出模型的obs路径 BranchName string //分支名称 Parameters string //传给modelarts的param参数 BootFile string //启动文件 @@ -119,6 +120,11 @@ type Cloudbrain struct { EngineName string //引擎名称 TotalVersionCount int //任务的所有版本数量,包括删除的 + ModelName string //模型名称 + ModelVersion string //模型版本 + CkptName string //权重文件名称 + ResultUrl string //推理结果的obs路径 + User *User `xorm:"-"` Repo *Repository `xorm:"-"` } diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index a34c87870..7d727f182 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -63,6 +63,8 @@ type CreateModelArtsInferenceJobForm struct { FlavorName string `form:"flaver_names" binding:"Required"` EngineName string `form:"engine_names" binding:"Required"` TrainUrl string `form:"train_url" binding:"Required"` + ModelName string `form:"model_name" binding:"Required"` + ModelVersion string `form:"model_version" binding:"Required"` CkptName string `form:"ckpt_name" binding:"Required"` } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index c0a003450..1225ea82b 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -46,6 +46,8 @@ const ( Lines = 500 TrainUrl = "train_url" DataUrl = "data_url" + ResultUrl = "result_url" + CkptName = "ckptName" PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" @@ -119,30 +121,29 @@ type GenerateTrainJobVersionReq struct { } type GenerateInferenceJobReq struct { - JobName string - Uuid string - Description string - CodeObsPath string - BootFile string - BootFileUrl string - DataUrl string - TrainUrl string - FlavorCode string - LogUrl string - PoolID string - WorkServerNumber int - EngineID int64 - Parameters []models.Parameter - CommitID string - IsLatestVersion string - Params string - BranchName string - PreVersionId int64 - PreVersionName string - FlavorName string - VersionCount int - EngineName string - TotalVersionCount int + JobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + FlavorCode string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + Params string + BranchName string + FlavorName string + EngineName string + ModelName string + ModelVersion string + CkptName string + ResultUrl string } type VersionInfo struct { @@ -510,34 +511,34 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: strconv.FormatInt(jobResult.JobID, 10), - JobName: req.JobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: attach.Name, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: NPUResource, - EngineID: req.EngineID, - TrainUrl: req.TrainUrl, - BranchName: req.BranchName, - Parameters: req.Params, - BootFile: req.BootFile, - DataUrl: req.DataUrl, - LogUrl: req.LogUrl, - FlavorCode: req.FlavorCode, - Description: req.Description, - WorkServerNumber: req.WorkServerNumber, - FlavorName: req.FlavorName, - EngineName: req.EngineName, - VersionCount: req.VersionCount, - TotalVersionCount: req.TotalVersionCount, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeInference), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.FlavorCode, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, + EngineName: req.EngineName, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + CkptName: req.CkptName, + ResultUrl: req.ResultUrl, }) if err != nil { diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 6dbe6740b..0ae433654 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1259,7 +1259,7 @@ func TrainJobShow(ctx *context.Context) { ctx.Data["canNewJob"] = canNewJob //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 - for i, task := range VersionListTasks { + for i, _ := range VersionListTasks { var parameters models.Parameters @@ -1280,9 +1280,6 @@ func TrainJobShow(ctx *context.Context) { } else { VersionListTasks[i].Parameters = "" } - - VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) - VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) } pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) @@ -1518,7 +1515,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference flavorCode := form.Flavor params := form.Params poolID := form.PoolID - isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath @@ -1526,9 +1522,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName - isLatestVersion := modelarts.IsLatestVersion FlavorName := form.FlavorName - VersionCount := modelarts.VersionCount EngineName := form.EngineName trainUrl := form.TrainUrl ckptName := form.CkptName @@ -1608,11 +1602,11 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference var parameters models.Parameters param := make([]models.Parameter, 0) param = append(param, models.Parameter{ - Label: modelarts.TrainUrl, - Value: trainUrl, + Label: modelarts.ResultUrl, + Value: "s3:/" + resultObsPath, }, models.Parameter{ - Label: modelarts.DataUrl, - Value: dataPath, + Label: modelarts.CkptName, + Value: ckptName, }) if len(params) != 0 { err := json.Unmarshal([]byte(params), ¶meters) @@ -1634,28 +1628,25 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference } req := &modelarts.GenerateInferenceJobReq{ - JobName: jobName, - DataUrl: dataPath, - Description: description, - CodeObsPath: codeObsPath, - BootFileUrl: codeObsPath + bootFile, - BootFile: bootFile, - TrainUrl: trainUrl, - FlavorCode: flavorCode, - WorkServerNumber: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Uuid: uuid, - Parameters: parameters.Parameter, - CommitID: commitID, - IsLatestVersion: isLatestVersion, - BranchName: branch_name, - Params: form.Params, - FlavorName: FlavorName, - EngineName: EngineName, - VersionCount: VersionCount, - TotalVersionCount: modelarts.TotalVersionCount, + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: trainUrl, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: param, //modelarts训练时用到 + CommitID: commitID, + BranchName: branch_name, + Params: form.Params, + FlavorName: FlavorName, + EngineName: EngineName, } //将params转换Parameters.Parameter,出错时返回给前端 @@ -1665,7 +1656,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference return } - err = modelarts.GenerateTrainJob(ctx, req) + err = modelarts.GenerateInferenceJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) inferenceJobErrorNewDataPrepare(ctx, form) From 84e189afc1499abdf0cd271134cd7622a3bf6af7 Mon Sep 17 00:00:00 2001 From: liuzx Date: Wed, 22 Dec 2021 19:41:08 +0800 Subject: [PATCH 003/108] update --- routers/repo/modelarts.go | 24 +++++++++++++----------- routers/routes/routes.go | 3 ++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 21dcdc593..a981d2182 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -331,7 +331,7 @@ func NotebookDel(ctx *context.Context) { return } - if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped){ + if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) { log.Error("the job(%s) has not been stopped", task.JobName) ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped")) return @@ -798,12 +798,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) _, err = ioutil.ReadDir(codeLocalPath) if err == nil { os.RemoveAll(codeLocalPath) - } else { - log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) - versionErrorDataPrepare(ctx, form) - ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) - return } + // } else { + // log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + // versionErrorDataPrepare(ctx, form) + // ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) + // return + // } // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) @@ -1606,12 +1607,13 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference _, err = ioutil.ReadDir(codeLocalPath) if err == nil { os.RemoveAll(codeLocalPath) - } else { - log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) - inferenceJobErrorNewDataPrepare(ctx, form) - ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsInferenceJobNew, &form) - return } + // } else { + // log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + // inferenceJobErrorNewDataPrepare(ctx, form) + // ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsInferenceJobNew, &form) + // return + // } // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 55ee2b750..4d5fb640d 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1022,7 +1022,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/create_version", cloudbrain.AdminOrJobCreaterRight, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) }) m.Get("/create", reqRepoCloudBrainWriter, repo.TrainJobNew) - m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) + // m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) + m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) From 028f37c81ca970e6558760e3a0e20f26185ab91d Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 23 Dec 2021 16:36:38 +0800 Subject: [PATCH 004/108] update --- routers/routes/routes.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 4d5fb640d..55ee2b750 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1022,8 +1022,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/create_version", cloudbrain.AdminOrJobCreaterRight, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) }) m.Get("/create", reqRepoCloudBrainWriter, repo.TrainJobNew) - // m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) - m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) + m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) From c428c5b54deeb699e39a98e44948d6d8cecbcbf5 Mon Sep 17 00:00:00 2001 From: zouap Date: Thu, 23 Dec 2021 16:48:57 +0800 Subject: [PATCH 005/108] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E6=8E=A8=E7=90=86?= =?UTF-8?q?=E7=9A=84=E4=B8=A4=E4=B8=AA=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zouap --- routers/repo/ai_model_manage.go | 50 +++++++++++++++++++++++++++++++++++++++++ routers/routes/routes.go | 2 ++ 2 files changed, 52 insertions(+) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 669bdf9fa..22b551206 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -586,3 +586,53 @@ func ModifyModelInfo(ctx *context.Context) { } } + +func QueryModelListForPredict(ctx *context.Context) { + repoId := ctx.Repo.Repository.ID + modelResult, count, err := models.QueryModel(&models.AiModelQueryOptions{ + ListOptions: models.ListOptions{ + Page: -1, + PageSize: -1, + }, + RepoID: repoId, + Type: -1, + New: -1, + }) + if err != nil { + ctx.ServerError("Cloudbrain", err) + return + } + log.Info("query return count=" + fmt.Sprint(count)) + + nameList := make([]string, 0) + + nameMap := make(map[string][]*models.AiModelManage) + for _, model := range modelResult { + if _, value := nameMap[model.Name]; !value { + models := make([]*models.AiModelManage, 0) + models = append(models, model) + nameMap[model.Name] = models + nameList = append(nameList, model.Name) + } else { + nameMap[model.Name] = append(nameMap[model.Name], model) + } + } + + mapInterface := make(map[string]interface{}) + mapInterface["nameList"] = nameList + mapInterface["nameMap"] = nameMap + ctx.JSON(http.StatusOK, mapInterface) +} + +func QueryModelFileForPredict(ctx *context.Context) { + id := ctx.Query("ID") + model, err := models.QueryModelById(id) + if err != nil { + log.Error("no such model!", err.Error()) + ctx.ServerError("no such model:", err) + return + } + prefix := model.Path[len(setting.Bucket)+2:] + fileinfos, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, prefix) + ctx.JSON(http.StatusOK, fileinfos) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 55ee2b750..a89704234 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -988,6 +988,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/show_model_child_api", repo.ShowOneVersionOtherModel) m.Get("/query_train_job", reqRepoCloudBrainReader, repo.QueryTrainJobList) m.Get("/query_train_job_version", reqRepoCloudBrainReader, repo.QueryTrainJobVersionList) + m.Get("/query_model_for_predict", reqRepoCloudBrainReader, repo.QueryModelListForPredict) + m.Get("/query_modelfile_for_predict", reqRepoCloudBrainReader, repo.QueryModelFileForPredict) m.Group("/:ID", func() { m.Get("", repo.ShowSingleModel) m.Get("/downloadsingle", repo.DownloadSingleModelFile) From 5b110544d644eb06178a9fce78591afa08bb33fa Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 23 Dec 2021 17:19:37 +0800 Subject: [PATCH 006/108] update --- routers/repo/ai_model_manage.go | 50 ------------------ routers/repo/modelarts.go | 110 ++++++++++++++++------------------------ routers/routes/routes.go | 2 - 3 files changed, 43 insertions(+), 119 deletions(-) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 22b551206..669bdf9fa 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -586,53 +586,3 @@ func ModifyModelInfo(ctx *context.Context) { } } - -func QueryModelListForPredict(ctx *context.Context) { - repoId := ctx.Repo.Repository.ID - modelResult, count, err := models.QueryModel(&models.AiModelQueryOptions{ - ListOptions: models.ListOptions{ - Page: -1, - PageSize: -1, - }, - RepoID: repoId, - Type: -1, - New: -1, - }) - if err != nil { - ctx.ServerError("Cloudbrain", err) - return - } - log.Info("query return count=" + fmt.Sprint(count)) - - nameList := make([]string, 0) - - nameMap := make(map[string][]*models.AiModelManage) - for _, model := range modelResult { - if _, value := nameMap[model.Name]; !value { - models := make([]*models.AiModelManage, 0) - models = append(models, model) - nameMap[model.Name] = models - nameList = append(nameList, model.Name) - } else { - nameMap[model.Name] = append(nameMap[model.Name], model) - } - } - - mapInterface := make(map[string]interface{}) - mapInterface["nameList"] = nameList - mapInterface["nameMap"] = nameMap - ctx.JSON(http.StatusOK, mapInterface) -} - -func QueryModelFileForPredict(ctx *context.Context) { - id := ctx.Query("ID") - model, err := models.QueryModelById(id) - if err != nil { - log.Error("no such model!", err.Error()) - ctx.ServerError("no such model:", err) - return - } - prefix := model.Path[len(setting.Bucket)+2:] - fileinfos, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, prefix) - ctx.JSON(http.StatusOK, fileinfos) -} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 276780103..7d3e2701d 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1728,10 +1728,9 @@ func InferenceJobIndex(ctx *context.Context) { Page: page, PageSize: setting.UI.IssuePagingNum, }, - RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), - IsLatestVersion: modelarts.IsLatestVersion, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeInference), }) if err != nil { ctx.ServerError("Cloudbrain", err) @@ -1890,24 +1889,15 @@ func InferenceJobShow(ctx *context.Context) { if page <= 0 { page = 1 } - VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ - ListOptions: models.ListOptions{ - Page: page, - PageSize: setting.UI.IssuePagingNum, - }, - RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), - JobID: jobID, - }) + task, err := models.GetCloudbrainByJobID(jobID) if err != nil { - log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + log.Error("GetInferenceTask(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobShow, nil) return } //设置权限 - canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID) + canNewJob, err := canUserCreateTrainJobVersion(ctx, task.UserID) if err != nil { ctx.ServerError("canNewJob failed", err) return @@ -1915,90 +1905,79 @@ func InferenceJobShow(ctx *context.Context) { ctx.Data["canNewJob"] = canNewJob //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 - for i, _ := range VersionListTasks { - - var parameters models.Parameters - - err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) - if err != nil { - log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) - trainJobNewDataPrepare(ctx) - return - } + var parameters models.Parameters + err = json.Unmarshal([]byte(task.Parameters), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) + trainJobNewDataPrepare(ctx) + return + } - if len(parameters.Parameter) > 0 { - paramTemp := "" - for _, Parameter := range parameters.Parameter { - param := Parameter.Label + " = " + Parameter.Value + "; " - paramTemp = paramTemp + param - } - VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] - } else { - VersionListTasks[i].Parameters = "" + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + "; " + paramTemp = paramTemp + param } + task.Parameters = paramTemp[:len(paramTemp)-2] + } else { + task.Parameters = "" } pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) ctx.Data["Page"] = pager ctx.Data["jobID"] = jobID - ctx.Data["jobName"] = VersionListTasks[0].JobName - ctx.Data["version_list_task"] = VersionListTasks - ctx.Data["version_list_count"] = VersionListCount - ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) + ctx.Data["jobName"] = task.JobName + ctx.Data["task"] = task + ctx.HTML(http.StatusOK, tplModelArtsInferenceJobShow) } func InferenceJobStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobIndex, nil) return } _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { - log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + log.Error("StopInferenceJob(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobIndex, nil) return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") } func InferenceJobDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") repo := ctx.Repo.Repository - VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ - RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), - JobID: jobID, - }) + task, err := models.GetCloudbrainByJobID(jobID) if err != nil { - ctx.ServerError("get VersionListTasks failed", err) + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobIndex, nil) return } //删除modelarts上的任务记录 _, err = modelarts.DelTrainJob(jobID) if err != nil { - log.Error("DelTrainJob(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + log.Error("DelInferenceJob(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobIndex, nil) return } //删除数据库Cloudbrain表的记录 - for _, task := range VersionListTasks { - err = models.DeleteJob(&task.Cloudbrain) - if err != nil { - ctx.ServerError("DeleteJob failed", err) - return - } + err = models.DeleteJob(&task.Cloudbrain) + if err != nil { + ctx.ServerError("DeleteJob failed", err) + return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") } func ResultDownload(ctx *context.Context) { @@ -2007,17 +1986,14 @@ func ResultDownload(ctx *context.Context) { ) var jobID = ctx.Params(":jobid") - versionName := ctx.Query("version_name") parentDir := ctx.Query("parent_dir") fileName := ctx.Query("file_name") - log.Info("DownloadSingleModelFile start.") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + log.Info("DownloadResult start.") + task, err := models.GetCloudbrainByJobID(jobID) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - return + ctx.Data["error"] = err.Error() } - - path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/") + path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, modelarts.ResultPath, parentDir, fileName), "/") log.Info("Download path is:%s", path) url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index a89704234..55ee2b750 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -988,8 +988,6 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/show_model_child_api", repo.ShowOneVersionOtherModel) m.Get("/query_train_job", reqRepoCloudBrainReader, repo.QueryTrainJobList) m.Get("/query_train_job_version", reqRepoCloudBrainReader, repo.QueryTrainJobVersionList) - m.Get("/query_model_for_predict", reqRepoCloudBrainReader, repo.QueryModelListForPredict) - m.Get("/query_modelfile_for_predict", reqRepoCloudBrainReader, repo.QueryModelFileForPredict) m.Group("/:ID", func() { m.Get("", repo.ShowSingleModel) m.Get("/downloadsingle", repo.DownloadSingleModelFile) From 2dcbcec4fe690c4e6ff4019a04cef043a69d5700 Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 23 Dec 2021 17:25:28 +0800 Subject: [PATCH 007/108] update --- routers/repo/modelarts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 7d3e2701d..d9cd43c4b 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1971,7 +1971,7 @@ func InferenceJobDel(ctx *context.Context) { } //删除数据库Cloudbrain表的记录 - err = models.DeleteJob(&task.Cloudbrain) + err = models.DeleteJob(task) if err != nil { ctx.ServerError("DeleteJob failed", err) return From 145e496988a615ceb5cec1083e1f40947df2d09e Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 23 Dec 2021 17:44:55 +0800 Subject: [PATCH 008/108] update --- routers/repo/modelarts.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index d9cd43c4b..b1fa23432 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1800,10 +1800,8 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { ctx.ServerError("json.Unmarshal failed:", err) return err } - ctx.Data["flavor_infos"] = flavorInfos.Info - resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath - ctx.Data["result_url"] = resultObsPath + ctx.Data["flavor_infos"] = flavorInfos.Info ctx.Data["params"] = "" ctx.Data["branchName"] = ctx.Repo.BranchName @@ -1924,9 +1922,6 @@ func InferenceJobShow(ctx *context.Context) { task.Parameters = "" } - pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) - pager.SetDefaultParams(ctx) - ctx.Data["Page"] = pager ctx.Data["jobID"] = jobID ctx.Data["jobName"] = task.JobName ctx.Data["task"] = task From b4680119f2604c55d41963f8ff89d73e8aac58d4 Mon Sep 17 00:00:00 2001 From: liuzx Date: Fri, 24 Dec 2021 09:43:13 +0800 Subject: [PATCH 009/108] update --- routers/repo/modelarts.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index b1fa23432..ac8c6b401 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1882,7 +1882,6 @@ func InferenceJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") - repo := ctx.Repo.Repository page := ctx.QueryInt("page") if page <= 0 { page = 1 @@ -1948,7 +1947,6 @@ func InferenceJobStop(ctx *context.Context) { func InferenceJobDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") - repo := ctx.Repo.Repository task, err := models.GetCloudbrainByJobID(jobID) if err != nil { From 983b6f6b405682aa16aa526321fcbf1a80964e6c Mon Sep 17 00:00:00 2001 From: liuzx Date: Fri, 24 Dec 2021 09:58:10 +0800 Subject: [PATCH 010/108] =?UTF-8?q?=E6=81=A2=E5=A4=8D=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/ai_model_manage.go | 49 +++++++++++++++++++++++++++++++++++++++++ routers/routes/routes.go | 2 ++ 2 files changed, 51 insertions(+) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 669bdf9fa..4d25fd6a5 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -586,3 +586,52 @@ func ModifyModelInfo(ctx *context.Context) { } } +func QueryModelListForPredict(ctx *context.Context) { + repoId := ctx.Repo.Repository.ID + modelResult, count, err := models.QueryModel(&models.AiModelQueryOptions{ + ListOptions: models.ListOptions{ + Page: -1, + PageSize: -1, + }, + RepoID: repoId, + Type: -1, + New: -1, + }) + if err != nil { + ctx.ServerError("Cloudbrain", err) + return + } + log.Info("query return count=" + fmt.Sprint(count)) + + nameList := make([]string, 0) + + nameMap := make(map[string][]*models.AiModelManage) + for _, model := range modelResult { + if _, value := nameMap[model.Name]; !value { + models := make([]*models.AiModelManage, 0) + models = append(models, model) + nameMap[model.Name] = models + nameList = append(nameList, model.Name) + } else { + nameMap[model.Name] = append(nameMap[model.Name], model) + } + } + + mapInterface := make(map[string]interface{}) + mapInterface["nameList"] = nameList + mapInterface["nameMap"] = nameMap + ctx.JSON(http.StatusOK, mapInterface) +} + +func QueryModelFileForPredict(ctx *context.Context) { + id := ctx.Query("ID") + model, err := models.QueryModelById(id) + if err != nil { + log.Error("no such model!", err.Error()) + ctx.ServerError("no such model:", err) + return + } + prefix := model.Path[len(setting.Bucket)+2:] + fileinfos, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, prefix) + ctx.JSON(http.StatusOK, fileinfos) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 55ee2b750..a89704234 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -988,6 +988,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/show_model_child_api", repo.ShowOneVersionOtherModel) m.Get("/query_train_job", reqRepoCloudBrainReader, repo.QueryTrainJobList) m.Get("/query_train_job_version", reqRepoCloudBrainReader, repo.QueryTrainJobVersionList) + m.Get("/query_model_for_predict", reqRepoCloudBrainReader, repo.QueryModelListForPredict) + m.Get("/query_modelfile_for_predict", reqRepoCloudBrainReader, repo.QueryModelFileForPredict) m.Group("/:ID", func() { m.Get("", repo.ShowSingleModel) m.Get("/downloadsingle", repo.DownloadSingleModelFile) From 6eedb70cca7801da4f31c08d8c93b607e46952a7 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Mon, 27 Dec 2021 09:35:20 +0800 Subject: [PATCH 011/108] fix issue --- options/locale/locale_en-US.ini | 5 + options/locale/locale_zh-CN.ini | 4 + templates/repo/debugjob/index.tmpl | 1 + templates/repo/modelarts/inferencejob/index.tmpl | 210 +++++++++++++++- templates/repo/modelarts/inferencejob/new.tmpl | 306 ++++++++++++++++++++++- templates/repo/modelarts/trainjob/index.tmpl | 1 + 6 files changed, 525 insertions(+), 2 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index b58021ea2..e48f0d5ff 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -887,6 +887,11 @@ modelarts.train_job_para_admin=train_job_para_admin modelarts.train_job_para.edit=train_job_para.edit modelarts.train_job_para.connfirm=train_job_para.connfirm + +modelarts.infer_job = Inference Job +modelarts.infer_job.model_version = Model/Version +modelarts.infer_job.select_model = Select Model + model.manage.import_new_model=Import New Model model.manage.create_error=Equal Name and Version has existed. model.manage.model_name = Model Name diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 7c821824d..c56578e7a 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -897,6 +897,10 @@ modelarts.train_job_para_admin=任务参数管理 modelarts.train_job_para.edit=编辑 modelarts.train_job_para.connfirm=确定 +modelarts.infer_job = 推理任务 +modelarts.infer_job.model_version = 模型/版本 +modelarts.infer_job.select_model = 选择模型 + model.manage.import_new_model=导入新模型 model.manage.create_error=相同的名称和版本的模型已经存在。 model.manage.model_name = 模型名称 diff --git a/templates/repo/debugjob/index.tmpl b/templates/repo/debugjob/index.tmpl index 7aa4bfa25..18aaadd34 100755 --- a/templates/repo/debugjob/index.tmpl +++ b/templates/repo/debugjob/index.tmpl @@ -220,6 +220,7 @@
diff --git a/templates/repo/modelarts/inferencejob/index.tmpl b/templates/repo/modelarts/inferencejob/index.tmpl index 30d74d258..ec8e0aad6 100644 --- a/templates/repo/modelarts/inferencejob/index.tmpl +++ b/templates/repo/modelarts/inferencejob/index.tmpl @@ -1 +1,209 @@ -test \ No newline at end of file + +{{template "base/head" .}} + + + + +
+
+
+
+
+
+
+
+
+ + +
+ +
+ {{template "repo/header" .}} + +
+ {{template "base/alert" .}} + + {{if eq 0 (len .Tasks)}} +
+
+
未创建过推理任务
+
+ {{if $.RepoIsEmpty}} +
代码版本:您还没有初始化代码仓库,请先创建代码版本;
+ {{end}} +
模型文件:您还没有模型文件,请先通过训练任务产生并导出模型;
+
数据集:云脑1提供 CPU / GPU 资源,云脑2提供 Ascend NPU 资源,调试使用的数据集也需要上传到对应的环境;
+
使用说明:可以参考启智AI协作平台小白训练营课程。
+
+
+ {{else}} + +
+
+
+ +
+ + +
+
+
+ {{$.i18n.Tr "repo.cloudbrain_task"}} +
+
+ {{$.i18n.Tr "repo.modelarts.infer_job.model_version"}} +
+
+ {{$.i18n.Tr "repo.modelarts.status"}} +
+
+ {{$.i18n.Tr "repo.modelarts.createtime"}} +
+
+ {{$.i18n.Tr "repo.cloudbrain_status_runtime"}} +
+
+ {{$.i18n.Tr "repo.modelarts.computing_resources"}} +
+
+ {{$.i18n.Tr "repo.cloudbrain_creator"}} +
+
+ {{$.i18n.Tr "repo.cloudbrain_operate"}} +
+
+
+ + {{range .Tasks}} +
+
+ + + + +
+ {{.VersionCount}} +
+ +
+ + {{.Status}} + +
+ +
+ {{TimeSinceUnix .Cloudbrain.CreatedUnix $.Lang}} +
+ +
+ {{.TrainJobDuration}} +
+ +
+ {{.ComputeResource}} +
+ +
+ {{if .User.Name}} + + {{else}} + + {{end}} +
+ +
+ +
+ {{$.CsrfTokenHtml}} + {{if .CanDel}} + + {{$.i18n.Tr "repo.stop"}} + + {{else}} + + {{$.i18n.Tr "repo.stop"}} + + {{end}} + +
+ +
+ {{$.CsrfTokenHtml}} + {{if .CanDel}} + + {{$.i18n.Tr "repo.delete"}} + + {{else}} + + {{$.i18n.Tr "repo.delete"}} + + {{end}} +
+
+
+
+ {{end}} {{template "base/paginate" .}} +
+ +
+
+
+ {{end}} + +
+ +
+
+ + + + +
+ +
+ + +{{template "base/footer" .}} diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl index 30d74d258..33019fcb4 100644 --- a/templates/repo/modelarts/inferencejob/new.tmpl +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -1 +1,305 @@ -test \ No newline at end of file +{{template "base/head" .}} + + +
+
+
+
+
+
+
+
+
+
+ {{template "repo/header" .}} +
+ {{template "base/alert" .}} +

+ {{.i18n.Tr "repo.modelarts.train_job.new"}} +

+
+ +
+ {{.CsrfTokenHtml}} + + + +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+
+ + +
+ +
+ + +
+
+ +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

+ +
+
+ + + +
+
+ +
+
+ +
+
+
+
+ + +
+
+ +
+
+ +
+ + +
+
+ + + 数据集位置存储在环境变量data_url中,训练输出路径存储在环境变量train_url中。 +
+ +
+ + {{if .bootFile}} + + {{else}} + + {{end}} + + + + 查看样例 +
+ + +
+ + {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} + +
+ {{if ne 0 (len .params)}} + {{range $k ,$v := .params}} +
+
+ +
+
+ +
+ + + + +
+ {{end}} + {{end}} +
+
+ + + + +
+ + +
+
+ + +
+ +
+
+ +
+ + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+ + + +
+
+
+
+{{template "base/footer" .}} + + \ No newline at end of file diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index 52201e79d..b4715a055 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -34,6 +34,7 @@
From 17041f10a1dd0b76849d002c8af72376934ea02a Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Mon, 27 Dec 2021 17:04:40 +0800 Subject: [PATCH 012/108] fix issue --- routers/repo/ai_model_manage.go | 2 +- templates/repo/debugjob/index.tmpl | 1 + templates/repo/modelarts/inferencejob/new.tmpl | 212 +++++++++++++++---------- web_src/js/index.js | 2 +- 4 files changed, 134 insertions(+), 83 deletions(-) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 4d25fd6a5..1f1821576 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -631,7 +631,7 @@ func QueryModelFileForPredict(ctx *context.Context) { ctx.ServerError("no such model:", err) return } - prefix := model.Path[len(setting.Bucket)+2:] + prefix := model.Path[len(setting.Bucket)+1:] fileinfos, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, prefix) ctx.JSON(http.StatusOK, fileinfos) } diff --git a/templates/repo/debugjob/index.tmpl b/templates/repo/debugjob/index.tmpl index 18aaadd34..ce060b152 100755 --- a/templates/repo/debugjob/index.tmpl +++ b/templates/repo/debugjob/index.tmpl @@ -480,6 +480,7 @@ \ No newline at end of file diff --git a/web_src/js/index.js b/web_src/js/index.js index 7d59cc0eb..ae8a1feb8 100755 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -4128,7 +4128,7 @@ function initDropDown() { } //云脑提示 -$('.question.circle.icon').hover(function(){ +$('.question.circle.icon.cloudbrain-question').hover(function(){ $(this).popup('show') $('.ui.popup.mini.top.center').css({"border-color":'rgba(50, 145, 248, 100)',"color":"rgba(3, 102, 214, 100)","border-radius":"5px","border-shadow":"none"}) }); From 999edd0767a7f887656f60170585ec7c3c461c9a Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Mon, 27 Dec 2021 17:10:14 +0800 Subject: [PATCH 013/108] fix issue --- templates/repo/modelarts/inferencejob/new.tmpl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl index c35287722..72b73081a 100644 --- a/templates/repo/modelarts/inferencejob/new.tmpl +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -287,7 +287,7 @@ nameMap[value].forEach(element => { let {TrainTaskInfo} = element TrainTaskInfo = JSON.parse(TrainTaskInfo) - html += `
${element.Version}
` + html += `
${element.Version}
` }); $('#model_name_version').append(html) $("#select_model_version").removeClass("loading") @@ -298,9 +298,11 @@ $(function(){ $('#select_model_version').dropdown({ onChange: function(value, text, $selectedItem) { + console.log(value,text,$selectedItem) + const dataID=$selectedItem[0].getAttribute("data-id") $("#select_model_checkpoint").addClass("loading") let html = '' - loadCheckpointList(value).then((res)=>{ + loadCheckpointList(dataID).then((res)=>{ res.forEach(element => { html += `
${element.FileName}
` }) From e0c06352ea1491115f0307dee3ca670a0252943c Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 28 Dec 2021 10:17:12 +0800 Subject: [PATCH 014/108] fix issue --- templates/repo/modelarts/inferencejob/new.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl index 72b73081a..48bcd87b7 100644 --- a/templates/repo/modelarts/inferencejob/new.tmpl +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -105,7 +105,7 @@