diff --git a/models/cloudbrain.go b/models/cloudbrain.go index bc09b2c57..b58383e6e 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -649,6 +649,23 @@ type GetTrainJobResult struct { NasShareAddr string `json:"nas_share_addr"` } +type GetTrainJobLogResult struct { + ErrorCode string `json:"error_code"` + ErrorMsg string `json:"error_msg"` + IsSuccess bool `json:"is_success"` + Content string `json:"content"` + Lines int `json:"lines"` + StartLine string `json:"start_line"` + EndLine string `json:"end_line"` +} + +type GetTrainJobLogFileNamesResult struct { + ErrorCode string `json:"error_code"` + ErrorMsg string `json:"error_msg"` + IsSuccess bool `json:"is_success"` + LogFileList []string `json:"log_file_list"` +} + func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) { sess := x.NewSession() defer sess.Close() diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index b865ec7ac..37c3972e1 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -43,6 +43,8 @@ const ( OutputPath = "/output/" LogPath = "/log/" JobPath = "/job/" + OrderDesc = "desc" + OrderAsc = "asc" ) type GenerateTrainJobReq struct { diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index 110b01b96..d3a639ab0 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -1,15 +1,15 @@ package modelarts import ( + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "crypto/tls" "encoding/json" "fmt" - "net/http" - - "code.gitea.io/gitea/models" - "code.gitea.io/gitea/modules/setting" "github.com/go-resty/resty/v2" + "net/http" + "strconv" ) var ( @@ -509,3 +509,91 @@ sendjob: return &result, nil } + +func GetTrainJobLog(jobID, versionID, baseLine, logFile, order string, lines int) (*models.GetTrainJobLogResult, error) { + checkSetting() + client := getRestyClient() + var result models.GetTrainJobLogResult + + retry := 0 + +sendjob: + res, err := client.R(). + SetQueryParams(map[string]string{ + "base_line": baseLine, + "lines": strconv.Itoa(lines), + "log_file": logFile, + "order": order, + }). + SetAuthToken(TOKEN). + SetResult(&result). + Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/aom-log") + + if err != nil { + return nil, fmt.Errorf("resty GetTrainJobLog: %v", err) + } + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if res.StatusCode() != http.StatusOK { + var temp models.ErrorResult + if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { + log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + } + log.Error("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + return &result, fmt.Errorf("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + } + + if !result.IsSuccess { + log.Error("GetTrainJobLog(%s) failed", jobID) + return &result, fmt.Errorf("获取作业日志失败:%s", result.ErrorMsg) + } + + return &result, nil +} + +func GetTrainJobLogFileNames(jobID, versionID string) (*models.GetTrainJobLogFileNamesResult, error) { + checkSetting() + client := getRestyClient() + var result models.GetTrainJobLogFileNamesResult + + retry := 0 + +sendjob: + res, err := client.R(). + SetAuthToken(TOKEN). + SetResult(&result). + Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/log/file-names") + + if err != nil { + return nil, fmt.Errorf("resty GetTrainJobLog: %v", err) + } + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if res.StatusCode() != http.StatusOK { + var temp models.ErrorResult + if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { + log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + } + log.Error("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + return &result, fmt.Errorf("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + } + + if !result.IsSuccess { + log.Error("GetTrainJobLog(%s) failed", jobID) + return &result, fmt.Errorf("获取作业日志失败:%s", result.ErrorMsg) + } + + return &result, nil +} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index cb0ea8d4d..3bd73d931 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -554,3 +554,32 @@ func TrainJobShow(ctx *context.Context) { ctx.Data["result"] = result ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } + +func TrainJobGetLog(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + + var jobID = ctx.Params(":jobid") + task, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + + resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + + result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, 20) + if err != nil { + log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + + ctx.Data["log"] = result + ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 2e311a780..7669b6908 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -936,6 +936,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/debug", reqRepoCloudBrainReader, repo.NotebookDebug) m.Post("/stop", reqRepoCloudBrainWriter, repo.NotebookStop) m.Post("/del", reqRepoCloudBrainWriter, repo.NotebookDel) + m.Get("/log", reqRepoCloudBrainReader, repo.TrainJobGetLog) }) m.Get("/create", reqRepoCloudBrainWriter, repo.TrainJobNew) m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate)