|
- package repo
-
- import (
- "encoding/json"
- "errors"
- "io"
- "net/http"
- "os"
- "path"
- "strconv"
- "strings"
- "time"
-
- "code.gitea.io/gitea/models"
- "code.gitea.io/gitea/modules/auth"
- "code.gitea.io/gitea/modules/base"
- "code.gitea.io/gitea/modules/context"
- "code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/modelarts"
- "code.gitea.io/gitea/modules/obs"
- "code.gitea.io/gitea/modules/setting"
- "code.gitea.io/gitea/modules/storage"
-
- "github.com/unknwon/com"
- )
-
- const (
- // tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
- tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
- tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new"
- tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show"
-
- tplModelArtsIndex base.TplName = "repo/modelarts/index"
- tplModelArtsNew base.TplName = "repo/modelarts/new"
- tplModelArtsShow base.TplName = "repo/modelarts/show"
-
- tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
- tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
- tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
- tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index"
- )
-
- // MustEnableDataset check if repository enable internal cb
- func MustEnableModelArts(ctx *context.Context) {
- if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) {
- ctx.NotFound("MustEnableCloudbrain", nil)
- return
- }
- }
-
- func ModelArtsIndex(ctx *context.Context) {
- MustEnableModelArts(ctx)
- repo := ctx.Repo.Repository
- page := ctx.QueryInt("page")
- if page <= 0 {
- page = 1
- }
-
- ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
- ListOptions: models.ListOptions{
- Page: page,
- PageSize: setting.UI.IssuePagingNum,
- },
- RepoID: repo.ID,
- Type: models.TypeCloudBrainTwo,
- })
- if err != nil {
- ctx.ServerError("Cloudbrain", err)
- return
- }
-
- for i, task := range ciTasks {
- if task.Status == string(models.JobRunning) {
- ciTasks[i].CanDebug = true
- } else {
- ciTasks[i].CanDebug = false
- }
-
- ciTasks[i].CanDel = models.CanDelJob(ctx.IsSigned, ctx.User, task)
- }
-
- pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
- pager.SetDefaultParams(ctx)
- ctx.Data["Page"] = pager
-
- ctx.Data["PageIsCloudBrain"] = true
- ctx.Data["Tasks"] = ciTasks
- ctx.HTML(200, tplModelArtsIndex)
- }
-
- func ModelArtsNew(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- t := time.Now()
- var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
- ctx.Data["job_name"] = jobName
-
- attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
- if err != nil {
- ctx.ServerError("GetAllUserAttachments failed:", err)
- return
- }
-
- ctx.Data["attachments"] = attachs
- ctx.Data["dataset_path"] = modelarts.DataSetMountPath
- ctx.Data["env"] = modelarts.NotebookEnv
- ctx.Data["notebook_type"] = modelarts.NotebookType
- if modelarts.FlavorInfos == nil {
- json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
- }
- ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
- ctx.HTML(200, tplModelArtsNew)
- }
-
- func ModelArtsCreate(ctx *context.Context, form auth.CreateModelArtsForm) {
- ctx.Data["PageIsCloudBrain"] = true
- jobName := form.JobName
- uuid := form.Attachment
- description := form.Description
- //repo := ctx.Repo.Repository
- if !jobNamePattern.MatchString(jobName) {
- ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplModelArtsNew, &form)
- return
- }
- err := modelarts.GenerateTask(ctx, jobName, uuid, description)
- if err != nil {
- ctx.RenderWithErr(err.Error(), tplModelArtsNew, &form)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
- }
-
- func ModelArtsShow(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.Data["error"] = err.Error()
- ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
- return
- }
-
- result, err := modelarts.GetJob(jobID)
- if err != nil {
- ctx.Data["error"] = err.Error()
- ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
- return
- }
-
- if result != nil {
- task.Status = result.Status
- err = models.UpdateJob(task)
- if err != nil {
- ctx.Data["error"] = err.Error()
- ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
- return
- }
-
- createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
- result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
- endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
- result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
- result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
- result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
- }
-
- ctx.Data["task"] = task
- ctx.Data["jobID"] = jobID
- ctx.Data["result"] = result
- ctx.HTML(200, tplModelArtsShow)
- }
-
- func ModelArtsDebug(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- _, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.ServerError("GetCloudbrainByJobID failed", err)
- return
- }
-
- result, err := modelarts.GetJob(jobID)
- if err != nil {
- ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
- return
- }
-
- res, err := modelarts.GetJobToken(jobID)
- if err != nil {
- ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
- return
- }
-
- urls := strings.Split(result.Spec.Annotations.Url, "/")
- urlPrefix := result.Spec.Annotations.TargetDomain
- for i, url := range urls {
- if i > 2 {
- urlPrefix += "/" + url
- }
- }
-
- //urlPrefix := result.Spec.Annotations.TargetDomain + "/modelarts/internal/hub/notebook/user/" + task.JobID
- log.Info(urlPrefix)
- debugUrl := urlPrefix + "?token=" + res.Token
- ctx.Redirect(debugUrl)
- }
-
- func ModelArtsStop(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- log.Info(jobID)
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.ServerError("GetCloudbrainByJobID failed", err)
- return
- }
-
- if task.Status != string(models.JobRunning) {
- log.Error("the job(%s) is not running", task.JobName)
- ctx.ServerError("the job is not running", errors.New("the job is not running"))
- return
- }
-
- param := models.NotebookAction{
- Action: models.ActionStop,
- }
- res, err := modelarts.StopJob(jobID, param)
- if err != nil {
- log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
- ctx.ServerError("StopJob failed", err)
- return
- }
-
- task.Status = res.CurrentStatus
- err = models.UpdateJob(task)
- if err != nil {
- ctx.ServerError("UpdateJob failed", err)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
- }
-
- func ModelArtsDel(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.ServerError("GetCloudbrainByJobID failed", err)
- return
- }
-
- if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) {
- log.Error("the job(%s) has not been stopped", task.JobName)
- ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
- return
- }
-
- _, err = modelarts.DelJob(jobID)
- if err != nil {
- log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
- ctx.ServerError("DelJob failed", err)
- return
- }
-
- err = models.DeleteJob(task)
- if err != nil {
- ctx.ServerError("DeleteJob failed", err)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
- }
-
- func NotebookIndex(ctx *context.Context) {
- MustEnableModelArts(ctx)
- repo := ctx.Repo.Repository
- page := ctx.QueryInt("page")
- if page <= 0 {
- page = 1
- }
-
- ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
- ListOptions: models.ListOptions{
- Page: page,
- PageSize: setting.UI.IssuePagingNum,
- },
- RepoID: repo.ID,
- Type: models.TypeCloudBrainNotebook,
- })
- if err != nil {
- ctx.ServerError("Cloudbrain", err)
- return
- }
-
- for i, task := range ciTasks {
- if task.Status == string(models.JobRunning) {
- ciTasks[i].CanDebug = true
- } else {
- ciTasks[i].CanDebug = false
- }
- }
-
- pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
- pager.SetDefaultParams(ctx)
- ctx.Data["Page"] = pager
-
- ctx.Data["PageIsCloudBrain"] = true
- ctx.Data["Tasks"] = ciTasks
- ctx.HTML(200, tplModelArtsNotebookIndex)
- }
-
- func NotebookNew(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- t := time.Now()
- var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
- ctx.Data["job_name"] = jobName
-
- attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
- if err != nil {
- ctx.ServerError("GetAllUserAttachments failed:", err)
- return
- }
-
- ctx.Data["attachments"] = attachs
- ctx.Data["dataset_path"] = modelarts.DataSetMountPath
- ctx.Data["env"] = modelarts.NotebookEnv
- ctx.Data["notebook_type"] = modelarts.NotebookType
- if modelarts.FlavorInfos == nil {
- json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
- }
- ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
-
- ctx.HTML(200, tplModelArtsNotebookNew)
- }
-
- func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
- ctx.Data["PageIsNotebook"] = true
- jobName := form.JobName
- uuid := form.Attachment
- description := form.Description
-
- err := modelarts.GenerateTask(ctx, jobName, uuid, description)
- if err != nil {
- ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
- }
-
- func NotebookShow(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.Data["error"] = err.Error()
- ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
- return
- }
-
- result, err := modelarts.GetJob(jobID)
- if err != nil {
- ctx.Data["error"] = err.Error()
- ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
- return
- }
-
- if result != nil {
- task.Status = result.Status
- err = models.UpdateJob(task)
- if err != nil {
- ctx.Data["error"] = err.Error()
- ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
- return
- }
-
- createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
- result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
- endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
- result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
- result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
- result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
- }
-
- ctx.Data["task"] = task
- ctx.Data["jobID"] = jobID
- ctx.Data["result"] = result
- ctx.HTML(200, tplModelArtsNotebookShow)
- }
-
- func NotebookDebug(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- _, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.ServerError("GetCloudbrainByJobID failed", err)
- return
- }
-
- result, err := modelarts.GetJob(jobID)
- if err != nil {
- ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
- return
- }
-
- res, err := modelarts.GetJobToken(jobID)
- if err != nil {
- ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
- return
- }
-
- urls := strings.Split(result.Spec.Annotations.Url, "/")
- urlPrefix := result.Spec.Annotations.TargetDomain
- for i, url := range urls {
- if i > 2 {
- urlPrefix += "/" + url
- }
- }
-
- debugUrl := urlPrefix + "?token=" + res.Token
- ctx.Redirect(debugUrl)
- }
-
- func NotebookStop(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- log.Info(jobID)
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.ServerError("GetCloudbrainByJobID failed", err)
- return
- }
-
- if task.Status != string(models.JobRunning) {
- log.Error("the job(%s) is not running", task.JobName)
- ctx.ServerError("the job is not running", errors.New("the job is not running"))
- return
- }
-
- param := models.NotebookAction{
- Action: models.ActionStop,
- }
- res, err := modelarts.StopJob(jobID, param)
- if err != nil {
- log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
- ctx.ServerError("StopJob failed", err)
- return
- }
-
- task.Status = res.CurrentStatus
- err = models.UpdateJob(task)
- if err != nil {
- ctx.ServerError("UpdateJob failed", err)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
- }
-
- func NotebookDel(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- ctx.ServerError("GetCloudbrainByJobID failed", err)
- return
- }
-
- if task.Status != string(models.JobStopped) {
- log.Error("the job(%s) has not been stopped", task.JobName)
- ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
- return
- }
-
- _, err = modelarts.DelNotebook(jobID)
- if err != nil {
- log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
- ctx.ServerError("DelJob failed", err)
- return
- }
-
- err = models.DeleteJob(task)
- if err != nil {
- ctx.ServerError("DeleteJob failed", err)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
- }
-
- func TrainJobIndex(ctx *context.Context) {
- MustEnableModelArts(ctx)
-
- //can, err := canUserCreateTrainJob(ctx.User.ID)
- //if err != nil {
- // ctx.ServerError("canUserCreateTrainJob", err)
- // return
- //}
- //
- //ctx.Data["CanCreate"] = can
-
- repo := ctx.Repo.Repository
- page := ctx.QueryInt("page")
- if page <= 0 {
- page = 1
- }
-
- tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
- ListOptions: models.ListOptions{
- Page: page,
- PageSize: setting.UI.IssuePagingNum,
- },
- RepoID: repo.ID,
- Type: models.TypeCloudBrainTrainJob,
- })
- if err != nil {
- ctx.ServerError("Cloudbrain", err)
- return
- }
-
- pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
- pager.SetDefaultParams(ctx)
- ctx.Data["Page"] = pager
-
- ctx.Data["PageIsCloudBrain"] = true
- ctx.Data["Tasks"] = tasks
- ctx.HTML(200, tplModelArtsTrainJobIndex)
- }
-
- func TrainJobNew(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- //can, err := canUserCreateTrainJob(ctx.User.ID)
- //if err != nil {
- // ctx.ServerError("canUserCreateTrainJob", err)
- // return
- //}
- //
- //if !can {
- // log.Error("the user can not create train-job")
- // ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job"))
- // return
- //}
-
- t := time.Now()
- var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
- ctx.Data["job_name"] = jobName
-
- attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
- if err != nil {
- ctx.ServerError("GetAllUserAttachments failed:", err)
- return
- }
- ctx.Data["attachments"] = attachs
-
- var resourcePools modelarts.ResourcePool
- if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
- ctx.ServerError("json.Unmarshal failed:", err)
- return
- }
- ctx.Data["resource_pools"] = resourcePools.Info
-
- var engines modelarts.Engine
- if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
- ctx.ServerError("json.Unmarshal failed:", err)
- return
- }
- ctx.Data["engines"] = engines.Info
-
- var versionInfos modelarts.VersionInfo
- if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
- ctx.ServerError("json.Unmarshal failed:", err)
- return
- }
- ctx.Data["engine_versions"] = versionInfos.Version
-
- var flavorInfos modelarts.Flavor
- if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
- ctx.ServerError("json.Unmarshal failed:", err)
- return
- }
- ctx.Data["flavor_infos"] = flavorInfos.Info
-
- outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
- ctx.Data["train_url"] = outputObsPath
-
- configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
- if err != nil {
- ctx.ServerError("getConfigList failed:", err)
- return
- }
-
- ctx.Data["config_list"] = configList.ParaConfigs
-
- ctx.HTML(200, tplModelArtsTrainJobNew)
- }
-
- func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
- ctx.Data["PageIsTrainJob"] = true
- jobName := form.JobName
- uuid := form.Attachment
- description := form.Description
- workServerNumber := form.WorkServerNumber
- engineID := form.EngineID
- bootFile := form.BootFile
- flavorCode := form.Flavor
- params := form.Params
- poolID := form.PoolID
- isSaveParam := form.IsSaveParam
- repo := ctx.Repo.Repository
- codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
- codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
- outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
- logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
- dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
-
- //can, err := canUserCreateTrainJob(ctx.User.ID)
- //if err != nil {
- // ctx.ServerError("canUserCreateTrainJob", err)
- // return
- //}
- //
- //if !can {
- // log.Error("the user can not create train-job")
- // ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form)
- // return
- //}
-
- //param check
- if err := paramCheckCreateTrainJob(form); err != nil {
- log.Error("paramCheckCreateTrainJob failed:(%v)", err)
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
- return
- }
-
- if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
- log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err)
- ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
- return
- }
-
- //todo: upload code (send to file_server todo this work?)
- if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
- log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
- ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
- return
- }
-
- if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
- log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
- ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
- return
- }
-
- if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
- log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
- ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
- return
- }
-
- //todo: del local code?
-
- var parameters models.Parameters
- param := make([]models.Parameter, 0)
- param = append(param, models.Parameter{
- Label: modelarts.TrainUrl,
- Value: outputObsPath,
- }, models.Parameter{
- Label: modelarts.DataUrl,
- Value: dataPath,
- })
- if len(params) != 0 {
- err := json.Unmarshal([]byte(params), ¶meters)
- if err != nil {
- log.Error("Failed to Unmarshal params: %s (%v)", params, err)
- ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form)
- return
- }
-
- for _, parameter := range parameters.Parameter {
- if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
- param = append(param, models.Parameter{
- Label: parameter.Label,
- Value: parameter.Value,
- })
- }
- }
- }
-
- //save param config
- if isSaveParam == "on" {
- if form.ParameterTemplateName == "" {
- log.Error("ParameterTemplateName is empty")
- ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form)
- return
- }
-
- _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
- ConfigName: form.ParameterTemplateName,
- Description: form.PrameterDescription,
- DataUrl: dataPath,
- AppUrl: codeObsPath,
- BootFileUrl: codeObsPath + bootFile,
- TrainUrl: outputObsPath,
- Flavor: models.Flavor{
- Code: flavorCode,
- },
- WorkServerNum: workServerNumber,
- EngineID: int64(engineID),
- LogUrl: logObsPath,
- PoolID: poolID,
- Parameter: param,
- })
-
- if err != nil {
- log.Error("Failed to CreateTrainJobConfig: %v", err)
- ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form)
- return
- }
- }
-
- req := &modelarts.GenerateTrainJobReq{
- JobName: jobName,
- DataUrl: dataPath,
- Description: description,
- CodeObsPath: codeObsPath,
- BootFile: codeObsPath + bootFile,
- TrainUrl: outputObsPath,
- FlavorCode: flavorCode,
- WorkServerNumber: workServerNumber,
- EngineID: int64(engineID),
- LogUrl: logObsPath,
- PoolID: poolID,
- Uuid: uuid,
- Parameters: param,
- }
-
- err := modelarts.GenerateTrainJob(ctx, req)
- if err != nil {
- log.Error("GenerateTrainJob failed:%v", err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
- return
- }
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
- }
-
- // readDir reads the directory named by dirname and returns
- // a list of directory entries sorted by filename.
- func readDir(dirname string) ([]os.FileInfo, error) {
- f, err := os.Open(dirname)
- if err != nil {
- return nil, err
- }
-
- list, err := f.Readdir(100)
- f.Close()
- if err != nil {
- //todo: can not upload empty folder
- if err == io.EOF {
- return nil, nil
- }
- return nil, err
- }
-
- //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() })
- return list, nil
- }
-
- func uploadCodeToObs(codePath, jobName, parentDir string) error {
- files, err := readDir(codePath)
- if err != nil {
- log.Error("readDir(%s) failed: %s", codePath, err.Error())
- return err
- }
-
- for _, file := range files {
- if file.IsDir() {
- input := &obs.PutObjectInput{}
- input.Bucket = setting.Bucket
- input.Key = parentDir + file.Name() + "/"
- _, err = storage.ObsCli.PutObject(input)
- if err != nil {
- log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
- return err
- }
-
- if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil {
- log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error())
- return err
- }
- } else {
- input := &obs.PutFileInput{}
- input.Bucket = setting.Bucket
- input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name()
- input.SourceFile = codePath + file.Name()
- _, err = storage.ObsCli.PutFile(input)
- if err != nil {
- log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error())
- return err
- }
- }
- }
-
- return nil
- }
-
- func obsMkdir(dir string) error {
- input := &obs.PutObjectInput{}
- input.Bucket = setting.Bucket
- input.Key = dir
- _, err := storage.ObsCli.PutObject(input)
- if err != nil {
- log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
- return err
- }
-
- return nil
- }
-
- func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
- if !strings.HasSuffix(form.BootFile, ".py") {
- log.Error("the boot file(%s) must be a python file", form.BootFile)
- return errors.New("启动文件必须是python文件")
- }
-
- if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 {
- log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber)
- return errors.New("计算节点数必须在1-25之间")
- }
-
- return nil
- }
-
- func TrainJobShow(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- attach, err := models.GetAttachmentByUUID(task.Uuid)
- if err != nil {
- log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
- if err != nil {
- log.Error("GetJob(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- if result != nil {
- result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05")
- if result.Duration != 0 {
- result.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000)
-
- } else {
- result.TrainJobDuration = "00:00:00"
- }
- result.Status = modelarts.TransTrainJobStatus(result.IntStatus)
- err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration))
- if err != nil {
- ctx.ServerError("UpdateJob failed", err)
- return
- }
-
- result.DatasetName = attach.Name
- }
-
- resultLogFile, resultLog, err := trainJobGetLog(jobID)
- if err != nil {
- log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
- ctx.Data["log"] = resultLog
- ctx.Data["task"] = task
- ctx.Data["jobID"] = jobID
- ctx.Data["result"] = result
- ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
- }
-
- func addZero(t int64) (m string) {
- if t < 10 {
- m = "0" + strconv.FormatInt(t, 10)
- return m
- } else {
- return strconv.FormatInt(t, 10)
- }
- }
-
- func TrainJobGetLog(ctx *context.Context) {
- ctx.Data["PageIsTrainJob"] = true
-
- var jobID = ctx.Params(":jobid")
- var logFileName = ctx.Query("file_name")
- var baseLine = ctx.Query("base_line")
- var order = ctx.Query("order")
-
- if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
- log.Error("order(%s) check failed", order)
- ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
- return
- }
-
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
- if err != nil {
- log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- ctx.Data["log"] = result
- //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
- }
-
- func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
- return nil, nil, err
- }
-
- resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
- if err != nil {
- log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
- return nil, nil, err
- }
-
- result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
- if err != nil {
- log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
- return nil, nil, err
- }
-
- return resultLogFile, result, err
- }
-
- func TrainJobDel(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
- return
- }
-
- _, err = modelarts.DelTrainJob(jobID)
- if err != nil {
- log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
- return
- }
-
- err = models.DeleteJob(task)
- if err != nil {
- ctx.ServerError("DeleteJob failed", err)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
- }
-
- func TrainJobStop(ctx *context.Context) {
- var jobID = ctx.Params(":jobid")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
- return
- }
-
- _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
- if err != nil {
- log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
- return
- }
-
- ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
- }
-
- func canUserCreateTrainJob(uid int64) (bool, error) {
- org, err := models.GetOrgByName(setting.AllowedOrg)
- if err != nil {
- log.Error("get allowed org failed: ", setting.AllowedOrg)
- return false, err
- }
-
- return org.IsOrgMember(uid)
- }
-
- func TrainJobGetConfigList(ctx *context.Context) {
- ctx.Data["PageIsTrainJob"] = true
-
- var jobID = ctx.Params(":jobid")
- var logFileName = ctx.Query("file_name")
- var baseLine = ctx.Query("base_line")
- var order = ctx.Query("order")
-
- if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
- log.Error("order(%s) check failed", order)
- ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
- return
- }
-
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
- if err != nil {
- log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- ctx.Data["log"] = result
- //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
- }
-
- func getConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
- var result models.GetConfigListResult
-
- list, err := modelarts.GetConfigList(perPage, page, sortBy, order, searchContent, configType)
- if err != nil {
- log.Error("GetConfigList failed:", err)
- return &result, err
- }
-
- for _, config := range list.ParaConfigs {
- paraConfig, err := modelarts.GetParaConfig(config.ConfigName, configType)
- if err != nil {
- log.Error("GetParaConfig failed:", err)
- return &result, err
- }
-
- config.Result = paraConfig
- }
-
- return list, nil
- }
-
- func TrainJobShowModels(ctx *context.Context) {
- ctx.Data["PageIsCloudBrain"] = true
-
- jobID := ctx.Params(":jobid")
- parentDir := ctx.Query("parentDir")
- dirArray := strings.Split(parentDir, "/")
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("no such job!", ctx.Data["msgID"])
- ctx.ServerError("no such job:", err)
- return
- }
-
- models, err := storage.GetObsListObject(task.JobName, parentDir)
- if err != nil {
- log.Info("get TrainJobListModel failed:", err)
- ctx.ServerError("GetObsListObject:", err)
- return
- }
-
- ctx.Data["Path"] = dirArray
- ctx.Data["Dirs"] = models
- ctx.Data["task"] = task
- ctx.Data["JobID"] = jobID
- ctx.HTML(200, tplModelArtsTrainJobShowModels)
- }
-
- func TrainJobDownloadModel(ctx *context.Context) {
- parentDir := ctx.Query("parentDir")
- fileName := ctx.Query("fileName")
- jobName := ctx.Query("jobName")
- url, err := storage.GetObsCreateSignedUrl(jobName, parentDir, fileName)
- if err != nil {
- log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
- ctx.ServerError("GetObsCreateSignedUrl", err)
- return
- }
- http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
- }
|