diff --git a/models/cloudbrain.go b/models/cloudbrain.go index b58383e6e..2459e6b7a 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -617,7 +617,8 @@ type GetTrainJobResult struct { JobName string `json:"job_name"` JobID int64 `json:"job_id"` Description string `json:"job_desc"` - Status int `json:"status"` + IntStatus int `json:"status"` + Status string LongCreateTime int64 `json:"create_time"` CreateTime string Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒 @@ -666,6 +667,12 @@ type GetTrainJobLogFileNamesResult struct { LogFileList []string `json:"log_file_list"` } +type TrainJobResult struct { + ErrorCode string `json:"error_code"` + ErrorMsg string `json:"error_msg"` + IsSuccess bool `json:"is_success"` +} + func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) { sess := x.NewSession() defer sess.Close() diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 37c3972e1..fda8d46c8 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -164,7 +164,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: transTrainJobStatus(jobResult.Status), + Status: TransTrainJobStatus(jobResult.Status), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, JobID: strconv.FormatInt(jobResult.JobID, 10), @@ -183,7 +183,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { return nil } -func transTrainJobStatus(status int) string{ +func TransTrainJobStatus(status int) string{ switch status { case 0: return "UNKNOWN" diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index d3a639ab0..da0ddf3eb 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -213,7 +213,7 @@ sendjob: return &result, nil } -func DelJob(jobID string) (*models.NotebookDelResult, error) { +func DelNotebook(jobID string) (*models.NotebookDelResult, error) { checkSetting() client := getRestyClient() var result models.NotebookDelResult @@ -477,10 +477,9 @@ func GetTrainJob(jobID, versionID string) (*models.GetTrainJobResult, error) { sendjob: res, err := client.R(). - SetHeader("Content-Type", "application/json"). SetAuthToken(TOKEN). SetResult(&result). - Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/" + versionID) + Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID) if err != nil { return nil, fmt.Errorf("resty GetTrainJob: %v", err) @@ -597,3 +596,44 @@ sendjob: return &result, nil } + +func DelTrainJob(jobID string) (*models.TrainJobResult, error) { + checkSetting() + client := getRestyClient() + var result models.TrainJobResult + + retry := 0 + +sendjob: + res, err := client.R(). + SetAuthToken(TOKEN). + SetResult(&result). + Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID) + + if err != nil { + return &result, fmt.Errorf("resty DelTrainJob: %v", err) + } + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if res.StatusCode() != http.StatusOK { + var temp models.ErrorResult + if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { + log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + } + log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + return &result, fmt.Errorf("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + } + + if !result.IsSuccess { + log.Error("DelTrainJob(%s) failed", jobID) + return &result, fmt.Errorf("删除训练作业失败:%s", result.ErrorMsg) + } + + return &result, nil +} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 3bd73d931..644bcb52b 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -239,7 +239,7 @@ func NotebookDel(ctx *context.Context) { return } - _, err = modelarts.DelJob(jobID) + _, err = modelarts.DelNotebook(jobID) if err != nil { log.Error("DelJob(%s) failed:%v", task.JobName, err.Error()) ctx.ServerError("DelJob failed", err) @@ -545,8 +545,8 @@ func TrainJobShow(ctx *context.Context) { } if result != nil { - createTime, _ := com.StrTo(result.LongCreateTime).Int64() - result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05") + result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05") + result.Status = modelarts.TransTrainJobStatus(result.IntStatus) } ctx.Data["task"] = task @@ -583,3 +583,27 @@ func TrainJobGetLog(ctx *context.Context) { ctx.Data["log"] = result ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } + +func TrainJobDel(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + task, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + ctx.ServerError("GetCloudbrainByJobID failed", err) + return + } + + _, err = modelarts.DelTrainJob(jobID) + if err != nil { + log.Error("TrainJobDel(%s) failed:%v", task.JobName, err.Error()) + ctx.ServerError("TrainJobDel failed", err) + return + } + + err = models.DeleteJob(task) + if err != nil { + ctx.ServerError("TrainJobDel failed", err) + return + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 7669b6908..960030bb6 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -935,7 +935,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("", reqRepoCloudBrainReader, repo.TrainJobShow) m.Get("/debug", reqRepoCloudBrainReader, repo.NotebookDebug) m.Post("/stop", reqRepoCloudBrainWriter, repo.NotebookStop) - m.Post("/del", reqRepoCloudBrainWriter, repo.NotebookDel) + m.Post("/del", reqRepoCloudBrainWriter, repo.TrainJobDel) m.Get("/log", reqRepoCloudBrainReader, repo.TrainJobGetLog) }) m.Get("/create", reqRepoCloudBrainWriter, repo.TrainJobNew) diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index ce91fb50d..9c208df1a 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -1,5 +1,10 @@ {{template "base/head" .}}