package repo import ( "encoding/json" "errors" "io" "net/http" "os" "path" "strconv" "strings" "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/auth" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/obs" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "github.com/unknwon/com" ) const ( // tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index" tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index" tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new" tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show" tplModelArtsIndex base.TplName = "repo/modelarts/index" tplModelArtsNew base.TplName = "repo/modelarts/new" tplModelArtsShow base.TplName = "repo/modelarts/show" tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index" tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index" ) // MustEnableDataset check if repository enable internal cb func MustEnableModelArts(ctx *context.Context) { if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) { ctx.NotFound("MustEnableCloudbrain", nil) return } } func ModelArtsIndex(ctx *context.Context) { MustEnableModelArts(ctx) repo := ctx.Repo.Repository page := ctx.QueryInt("page") if page <= 0 { page = 1 } ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, PageSize: setting.UI.IssuePagingNum, }, RepoID: repo.ID, Type: models.TypeCloudBrainTwo, }) if err != nil { ctx.ServerError("Cloudbrain", err) return } for i, task := range ciTasks { if task.Status == string(models.JobRunning) { ciTasks[i].CanDebug = true } else { ciTasks[i].CanDebug = false } ciTasks[i].CanDel = models.CanDelJob(ctx.IsSigned, ctx.User, task) } pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) ctx.Data["Page"] = pager ctx.Data["PageIsCloudBrain"] = true ctx.Data["Tasks"] = ciTasks ctx.HTML(200, tplModelArtsIndex) } func ModelArtsNew(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true t := time.Now() var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] ctx.Data["job_name"] = jobName attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) if err != nil { ctx.ServerError("GetAllUserAttachments failed:", err) return } ctx.Data["attachments"] = attachs ctx.Data["dataset_path"] = modelarts.DataSetMountPath ctx.Data["env"] = modelarts.NotebookEnv ctx.Data["notebook_type"] = modelarts.NotebookType if modelarts.FlavorInfos == nil { json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) } ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo ctx.HTML(200, tplModelArtsNew) } func ModelArtsCreate(ctx *context.Context, form auth.CreateModelArtsForm) { ctx.Data["PageIsCloudBrain"] = true jobName := form.JobName uuid := form.Attachment description := form.Description //repo := ctx.Repo.Repository if !jobNamePattern.MatchString(jobName) { ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplModelArtsNew, &form) return } err := modelarts.GenerateTask(ctx, jobName, uuid, description) if err != nil { ctx.RenderWithErr(err.Error(), tplModelArtsNew, &form) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts") } func ModelArtsShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil) return } result, err := modelarts.GetJob(jobID) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil) return } if result != nil { task.Status = result.Status err = models.UpdateJob(task) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil) return } createTime, _ := com.StrTo(result.CreationTimestamp).Int64() result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05") endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64() result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05") result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05") result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05") } ctx.Data["task"] = task ctx.Data["jobID"] = jobID ctx.Data["result"] = result ctx.HTML(200, tplModelArtsShow) } func ModelArtsDebug(ctx *context.Context) { var jobID = ctx.Params(":jobid") _, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } result, err := modelarts.GetJob(jobID) if err != nil { ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil) return } res, err := modelarts.GetJobToken(jobID) if err != nil { ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil) return } urls := strings.Split(result.Spec.Annotations.Url, "/") urlPrefix := result.Spec.Annotations.TargetDomain for i, url := range urls { if i > 2 { urlPrefix += "/" + url } } //urlPrefix := result.Spec.Annotations.TargetDomain + "/modelarts/internal/hub/notebook/user/" + task.JobID log.Info(urlPrefix) debugUrl := urlPrefix + "?token=" + res.Token ctx.Redirect(debugUrl) } func ModelArtsStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") log.Info(jobID) task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } if task.Status != string(models.JobRunning) { log.Error("the job(%s) is not running", task.JobName) ctx.ServerError("the job is not running", errors.New("the job is not running")) return } param := models.NotebookAction{ Action: models.ActionStop, } res, err := modelarts.StopJob(jobID, param) if err != nil { log.Error("StopJob(%s) failed:%v", task.JobName, err.Error()) ctx.ServerError("StopJob failed", err) return } task.Status = res.CurrentStatus err = models.UpdateJob(task) if err != nil { ctx.ServerError("UpdateJob failed", err) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts") } func ModelArtsDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) { log.Error("the job(%s) has not been stopped", task.JobName) ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped")) return } _, err = modelarts.DelJob(jobID) if err != nil { log.Error("DelJob(%s) failed:%v", task.JobName, err.Error()) ctx.ServerError("DelJob failed", err) return } err = models.DeleteJob(task) if err != nil { ctx.ServerError("DeleteJob failed", err) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts") } func NotebookIndex(ctx *context.Context) { MustEnableModelArts(ctx) repo := ctx.Repo.Repository page := ctx.QueryInt("page") if page <= 0 { page = 1 } ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, PageSize: setting.UI.IssuePagingNum, }, RepoID: repo.ID, Type: models.TypeCloudBrainNotebook, }) if err != nil { ctx.ServerError("Cloudbrain", err) return } for i, task := range ciTasks { if task.Status == string(models.JobRunning) { ciTasks[i].CanDebug = true } else { ciTasks[i].CanDebug = false } } pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) ctx.Data["Page"] = pager ctx.Data["PageIsCloudBrain"] = true ctx.Data["Tasks"] = ciTasks ctx.HTML(200, tplModelArtsNotebookIndex) } func NotebookNew(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true t := time.Now() var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] ctx.Data["job_name"] = jobName attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) if err != nil { ctx.ServerError("GetAllUserAttachments failed:", err) return } ctx.Data["attachments"] = attachs ctx.Data["dataset_path"] = modelarts.DataSetMountPath ctx.Data["env"] = modelarts.NotebookEnv ctx.Data["notebook_type"] = modelarts.NotebookType if modelarts.FlavorInfos == nil { json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) } ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo ctx.HTML(200, tplModelArtsNotebookNew) } func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { ctx.Data["PageIsNotebook"] = true jobName := form.JobName uuid := form.Attachment description := form.Description err := modelarts.GenerateTask(ctx, jobName, uuid, description) if err != nil { ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook") } func NotebookShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) return } result, err := modelarts.GetJob(jobID) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) return } if result != nil { task.Status = result.Status err = models.UpdateJob(task) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) return } createTime, _ := com.StrTo(result.CreationTimestamp).Int64() result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05") endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64() result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05") result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05") result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05") } ctx.Data["task"] = task ctx.Data["jobID"] = jobID ctx.Data["result"] = result ctx.HTML(200, tplModelArtsNotebookShow) } func NotebookDebug(ctx *context.Context) { var jobID = ctx.Params(":jobid") _, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } result, err := modelarts.GetJob(jobID) if err != nil { ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) return } res, err := modelarts.GetJobToken(jobID) if err != nil { ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) return } urls := strings.Split(result.Spec.Annotations.Url, "/") urlPrefix := result.Spec.Annotations.TargetDomain for i, url := range urls { if i > 2 { urlPrefix += "/" + url } } debugUrl := urlPrefix + "?token=" + res.Token ctx.Redirect(debugUrl) } func NotebookStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") log.Info(jobID) task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } if task.Status != string(models.JobRunning) { log.Error("the job(%s) is not running", task.JobName) ctx.ServerError("the job is not running", errors.New("the job is not running")) return } param := models.NotebookAction{ Action: models.ActionStop, } res, err := modelarts.StopJob(jobID, param) if err != nil { log.Error("StopJob(%s) failed:%v", task.JobName, err.Error()) ctx.ServerError("StopJob failed", err) return } task.Status = res.CurrentStatus err = models.UpdateJob(task) if err != nil { ctx.ServerError("UpdateJob failed", err) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook") } func NotebookDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { ctx.ServerError("GetCloudbrainByJobID failed", err) return } if task.Status != string(models.JobStopped) { log.Error("the job(%s) has not been stopped", task.JobName) ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped")) return } _, err = modelarts.DelNotebook(jobID) if err != nil { log.Error("DelJob(%s) failed:%v", task.JobName, err.Error()) ctx.ServerError("DelJob failed", err) return } err = models.DeleteJob(task) if err != nil { ctx.ServerError("DeleteJob failed", err) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook") } func TrainJobIndex(ctx *context.Context) { MustEnableModelArts(ctx) //can, err := canUserCreateTrainJob(ctx.User.ID) //if err != nil { // ctx.ServerError("canUserCreateTrainJob", err) // return //} // //ctx.Data["CanCreate"] = can repo := ctx.Repo.Repository page := ctx.QueryInt("page") if page <= 0 { page = 1 } tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, PageSize: setting.UI.IssuePagingNum, }, RepoID: repo.ID, Type: models.TypeCloudBrainTrainJob, }) if err != nil { ctx.ServerError("Cloudbrain", err) return } pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) ctx.Data["Page"] = pager ctx.Data["PageIsCloudBrain"] = true ctx.Data["Tasks"] = tasks ctx.HTML(200, tplModelArtsTrainJobIndex) } func TrainJobNew(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true //can, err := canUserCreateTrainJob(ctx.User.ID) //if err != nil { // ctx.ServerError("canUserCreateTrainJob", err) // return //} // //if !can { // log.Error("the user can not create train-job") // ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job")) // return //} t := time.Now() var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] ctx.Data["job_name"] = jobName attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) if err != nil { ctx.ServerError("GetAllUserAttachments failed:", err) return } ctx.Data["attachments"] = attachs var resourcePools modelarts.ResourcePool if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { ctx.ServerError("json.Unmarshal failed:", err) return } ctx.Data["resource_pools"] = resourcePools.Info var engines modelarts.Engine if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { ctx.ServerError("json.Unmarshal failed:", err) return } ctx.Data["engines"] = engines.Info var versionInfos modelarts.VersionInfo if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { ctx.ServerError("json.Unmarshal failed:", err) return } ctx.Data["engine_versions"] = versionInfos.Version var flavorInfos modelarts.Flavor if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { ctx.ServerError("json.Unmarshal failed:", err) return } ctx.Data["flavor_infos"] = flavorInfos.Info outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath ctx.Data["train_url"] = outputObsPath configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) if err != nil { ctx.ServerError("getConfigList failed:", err) return } ctx.Data["config_list"] = configList.ParaConfigs ctx.HTML(200, tplModelArtsTrainJobNew) } func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true jobName := form.JobName uuid := form.Attachment description := form.Description workServerNumber := form.WorkServerNumber engineID := form.EngineID bootFile := form.BootFile flavorCode := form.Flavor params := form.Params poolID := form.PoolID isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" //can, err := canUserCreateTrainJob(ctx.User.ID) //if err != nil { // ctx.ServerError("canUserCreateTrainJob", err) // return //} // //if !can { // log.Error("the user can not create train-job") // ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form) // return //} //param check if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err) ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) return } //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) return } if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) return } if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form) return } //todo: del local code? var parameters models.Parameters param := make([]models.Parameter, 0) param = append(param, models.Parameter{ Label: modelarts.TrainUrl, Value: outputObsPath, }, models.Parameter{ Label: modelarts.DataUrl, Value: dataPath, }) if len(params) != 0 { err := json.Unmarshal([]byte(params), ¶meters) if err != nil { log.Error("Failed to Unmarshal params: %s (%v)", params, err) ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form) return } for _, parameter := range parameters.Parameter { if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { param = append(param, models.Parameter{ Label: parameter.Label, Value: parameter.Value, }) } } } //save param config if isSaveParam == "on" { if form.ParameterTemplateName == "" { log.Error("ParameterTemplateName is empty") ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form) return } _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ ConfigName: form.ParameterTemplateName, Description: form.PrameterDescription, DataUrl: dataPath, AppUrl: codeObsPath, BootFileUrl: codeObsPath + bootFile, TrainUrl: outputObsPath, Flavor: models.Flavor{ Code: flavorCode, }, WorkServerNum: workServerNumber, EngineID: int64(engineID), LogUrl: logObsPath, PoolID: poolID, Parameter: param, }) if err != nil { log.Error("Failed to CreateTrainJobConfig: %v", err) ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form) return } } req := &modelarts.GenerateTrainJobReq{ JobName: jobName, DataUrl: dataPath, Description: description, CodeObsPath: codeObsPath, BootFile: codeObsPath + bootFile, TrainUrl: outputObsPath, FlavorCode: flavorCode, WorkServerNumber: workServerNumber, EngineID: int64(engineID), LogUrl: logObsPath, PoolID: poolID, Uuid: uuid, Parameters: param, } err := modelarts.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } // readDir reads the directory named by dirname and returns // a list of directory entries sorted by filename. func readDir(dirname string) ([]os.FileInfo, error) { f, err := os.Open(dirname) if err != nil { return nil, err } list, err := f.Readdir(100) f.Close() if err != nil { //todo: can not upload empty folder if err == io.EOF { return nil, nil } return nil, err } //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() }) return list, nil } func uploadCodeToObs(codePath, jobName, parentDir string) error { files, err := readDir(codePath) if err != nil { log.Error("readDir(%s) failed: %s", codePath, err.Error()) return err } for _, file := range files { if file.IsDir() { input := &obs.PutObjectInput{} input.Bucket = setting.Bucket input.Key = parentDir + file.Name() + "/" _, err = storage.ObsCli.PutObject(input) if err != nil { log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) return err } if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil { log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error()) return err } } else { input := &obs.PutFileInput{} input.Bucket = setting.Bucket input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name() input.SourceFile = codePath + file.Name() _, err = storage.ObsCli.PutFile(input) if err != nil { log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error()) return err } } } return nil } func obsMkdir(dir string) error { input := &obs.PutObjectInput{} input.Bucket = setting.Bucket input.Key = dir _, err := storage.ObsCli.PutObject(input) if err != nil { log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) return err } return nil } func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { if !strings.HasSuffix(form.BootFile, ".py") { log.Error("the boot file(%s) must be a python file", form.BootFile) return errors.New("启动文件必须是python文件") } if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 { log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber) return errors.New("计算节点数必须在1-25之间") } return nil } func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } attach, err := models.GetAttachmentByUUID(task.Uuid) if err != nil { log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("GetJob(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } if result != nil { result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05") if result.Duration != 0 { result.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000) } else { result.TrainJobDuration = "00:00:00" } result.Status = modelarts.TransTrainJobStatus(result.IntStatus) err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration)) if err != nil { ctx.ServerError("UpdateJob failed", err) return } result.DatasetName = attach.Name } resultLogFile, resultLog, err := trainJobGetLog(jobID) if err != nil { log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] ctx.Data["log"] = resultLog ctx.Data["task"] = task ctx.Data["jobID"] = jobID ctx.Data["result"] = result ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } func addZero(t int64) (m string) { if t < 10 { m = "0" + strconv.FormatInt(t, 10) return m } else { return strconv.FormatInt(t, 10) } } func TrainJobGetLog(ctx *context.Context) { ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") var logFileName = ctx.Query("file_name") var baseLine = ctx.Query("base_line") var order = ctx.Query("order") if order != modelarts.OrderDesc && order != modelarts.OrderAsc { log.Error("order(%s) check failed", order) ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow) return } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines) if err != nil { log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } ctx.Data["log"] = result //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) return nil, nil, err } resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) return nil, nil, err } result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines) if err != nil { log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) return nil, nil, err } return resultLogFile, result, err } func TrainJobDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) return } _, err = modelarts.DelTrainJob(jobID) if err != nil { log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) return } err = models.DeleteJob(task) if err != nil { ctx.ServerError("DeleteJob failed", err) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } func TrainJobStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) return } _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } func canUserCreateTrainJob(uid int64) (bool, error) { org, err := models.GetOrgByName(setting.AllowedOrg) if err != nil { log.Error("get allowed org failed: ", setting.AllowedOrg) return false, err } return org.IsOrgMember(uid) } func TrainJobGetConfigList(ctx *context.Context) { ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") var logFileName = ctx.Query("file_name") var baseLine = ctx.Query("base_line") var order = ctx.Query("order") if order != modelarts.OrderDesc && order != modelarts.OrderAsc { log.Error("order(%s) check failed", order) ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow) return } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines) if err != nil { log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } ctx.Data["log"] = result //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } func getConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) { var result models.GetConfigListResult list, err := modelarts.GetConfigList(perPage, page, sortBy, order, searchContent, configType) if err != nil { log.Error("GetConfigList failed:", err) return &result, err } for _, config := range list.ParaConfigs { paraConfig, err := modelarts.GetParaConfig(config.ConfigName, configType) if err != nil { log.Error("GetParaConfig failed:", err) return &result, err } config.Result = paraConfig } return list, nil } func TrainJobShowModels(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true jobID := ctx.Params(":jobid") parentDir := ctx.Query("parentDir") dirArray := strings.Split(parentDir, "/") task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("no such job!", ctx.Data["msgID"]) ctx.ServerError("no such job:", err) return } models, err := storage.GetObsListObject(task.JobName, parentDir) if err != nil { log.Info("get TrainJobListModel failed:", err) ctx.ServerError("GetObsListObject:", err) return } ctx.Data["Path"] = dirArray ctx.Data["Dirs"] = models ctx.Data["task"] = task ctx.Data["JobID"] = jobID ctx.HTML(200, tplModelArtsTrainJobShowModels) } func TrainJobDownloadModel(ctx *context.Context) { parentDir := ctx.Query("parentDir") fileName := ctx.Query("fileName") jobName := ctx.Query("jobName") url, err := storage.GetObsCreateSignedUrl(jobName, parentDir, fileName) if err != nil { log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) ctx.ServerError("GetObsCreateSignedUrl", err) return } http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) }