From 8745aa211c1b9d3f114d869930af091435d3441b Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 18 Aug 2022 11:24:17 +0800 Subject: [PATCH 1/6] check delete task --- routers/api/v1/repo/modelarts.go | 11 ++++++++++- routers/repo/modelarts.go | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 419c6d6a4..79e35812e 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -6,13 +6,14 @@ package repo import ( - "code.gitea.io/gitea/modules/notification" "encoding/json" "net/http" "path" "strconv" "strings" + "code.gitea.io/gitea/modules/notification" + "code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/setting" @@ -352,6 +353,14 @@ func DelTrainJobVersion(ctx *context.APIContext) { return } + if task.Status != string(models.ModelArtsTrainJobImageFailed) && task.Status != string(models.ModelArtsTrainJobSubmitFailed) && task.Status != string(models.ModelArtsTrainJobDeleteFailed) && + task.Status != string(models.ModelArtsTrainJobCompleted) && task.Status != string(models.ModelArtsTrainJobFailed) && + task.Status != string(models.ModelArtsTrainJobKilled) && task.Status != string(models.ModelArtsTrainJobCanceled) && task.Status != string(models.ModelArtsTrainJobLost) { + log.Error("the job(%s) version has not been stopped", task.JobName) + ctx.NotFound(err) + return + } + //删除modelarts上的记录 _, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 763308930..237591635 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1839,6 +1839,16 @@ func TrainJobDel(ctx *context.Context) { return } + for _, task := range VersionListTasks { + if task.Status != string(models.ModelArtsTrainJobImageFailed) && task.Status != string(models.ModelArtsTrainJobSubmitFailed) && task.Status != string(models.ModelArtsTrainJobDeleteFailed) && + task.Status != string(models.ModelArtsTrainJobCompleted) && task.Status != string(models.ModelArtsTrainJobFailed) && + task.Status != string(models.ModelArtsTrainJobKilled) && task.Status != string(models.ModelArtsTrainJobCanceled) && task.Status != string(models.ModelArtsTrainJobLost) { + log.Error("the job(%s) version has not been stopped", task.JobName) + ctx.RenderWithErr("the job version has not been stopped", tplModelArtsTrainJobIndex, nil) + return + } + } + //删除modelarts上的任务记录 _, err = modelarts.DelTrainJob(jobID) if err != nil { From 58f7fbf04c38fefc0e6a17855d702879e77d0c57 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 23 Aug 2022 10:38:11 +0800 Subject: [PATCH 2/6] fix issue --- options/locale/locale_en-US.ini | 3 ++- options/locale/locale_zh-CN.ini | 3 ++- templates/custom/wait_count.tmpl | 4 ++-- templates/custom/wait_count_train.tmpl | 4 ++-- templates/repo/modelarts/inferencejob/new.tmpl | 2 +- templates/repo/modelarts/trainjob/new.tmpl | 2 +- templates/repo/modelarts/trainjob/version_new.tmpl | 2 +- 7 files changed, 11 insertions(+), 9 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 90d0348bb..5c8016326 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3119,7 +3119,8 @@ wrong_specification=You cannot use this specification, please choose another ite job_name_rule = Please enter letters, numbers, _ and - up to 64 characters and cannot end with a dash (-). -dataset_path_rule = The dataset location is stored in the environment variable data_url, and the output path is stored in the environment variable train_url. +train_dataset_path_rule = The dataset location is stored in the environment variable data_url, and the output path is stored in the environment variable train_url. +infer_dataset_path_rule = The dataset location is stored in the environment variable data_url, and the output path is stored in the environment variable train_url. view_sample = View sample inference_output_path_rule = The inference output path is stored in the environment variable result_url. model_file_path_rule=The model file location is stored in the environment variable ckpt_url diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 4c38aefcc..80c5d347e 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -3134,7 +3134,8 @@ card_type = 卡类型 wrong_specification=您目前不能使用这个资源规格,请选择其他资源规格。 job_name_rule = 请输入字母、数字、_和-,最长64个字符,且不能以中划线(-)结尾。 -dataset_path_rule = 数据集位置存储在环境变量data_url中,输出路径存储在环境变量train_url中。 +train_dataset_path_rule = 数据集位置存储在环境变量data_url中,训练输出路径存储在环境变量train_url中。 +infer_dataset_path_rule = 数据集位置存储在环境变量data_url中,推理输出路径存储在环境变量train_url中。 view_sample = 查看样例 inference_output_path_rule = 推理输出路径存储在环境变量result_url中。 model_file_path_rule = 模型文件位置存储在环境变量ckpt_url中。 diff --git a/templates/custom/wait_count.tmpl b/templates/custom/wait_count.tmpl index bef8f1327..072d1d40d 100644 --- a/templates/custom/wait_count.tmpl +++ b/templates/custom/wait_count.tmpl @@ -13,9 +13,9 @@ {{ end }} - {{.i18n.Tr "repo.wait_count_start"}} {{if .QueuesDetail}} {{ $gpuQueue }} diff --git a/templates/custom/wait_count_train.tmpl b/templates/custom/wait_count_train.tmpl index fcfadc5be..4b7e2dac3 100644 --- a/templates/custom/wait_count_train.tmpl +++ b/templates/custom/wait_count_train.tmpl @@ -14,9 +14,9 @@ {{ end }} - {{.ctx.i18n.Tr "repo.wait_count_start"}} {{if .type}} {{ $gpuQueue }} diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl index fa82ad5d8..cb848f102 100644 --- a/templates/repo/modelarts/inferencejob/new.tmpl +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -86,7 +86,7 @@ {{template "custom/wait_count_train" Dict "ctx" $}}
- {{.i18n.Tr "cloudbrain.dataset_path_rule" | Safe}} + {{.i18n.Tr "cloudbrain.infer_dataset_path_rule" | Safe}}
diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index cf3ea76c4..f3c03584b 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -113,7 +113,7 @@ {{template "custom/wait_count_train" Dict "ctx" $}}
- {{.i18n.Tr "cloudbrain.dataset_path_rule" | Safe}} + {{.i18n.Tr "cloudbrain.train_dataset_path_rule" | Safe}}
diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index cf32409ed..f0bab8e82 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -171,7 +171,7 @@
- {{.i18n.Tr "cloudbrain.dataset_path_rule"}} + {{.i18n.Tr "cloudbrain.train_dataset_path_rule"}}
{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} From 744d89fca5623439a2ec2bc06130483f2dbe6e3d Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 23 Aug 2022 11:41:48 +0800 Subject: [PATCH 3/6] fix issue --- templates/repo/modelarts/trainjob/show.tmpl | 38 ++++++----------------------- web_src/js/features/cloudbrainShow.js | 29 ++++++++++++++++++++++ web_src/js/features/cloudrbanin.js | 13 ++++++++++ 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/templates/repo/modelarts/trainjob/show.tmpl b/templates/repo/modelarts/trainjob/show.tmpl index e5873d540..239763de1 100755 --- a/templates/repo/modelarts/trainjob/show.tmpl +++ b/templates/repo/modelarts/trainjob/show.tmpl @@ -274,10 +274,14 @@ {{if .CanDel}} - {{$.i18n.Tr "repo.delete"}} {{else}} - {{$.i18n.Tr "repo.delete"}} {{end}}
@@ -896,35 +900,7 @@ $('#name').val(modelName) $('#version').val("0.0.1") } - function deleteVersion(version_name) { - stopBubbling(arguments.callee.caller.arguments[0]) - let flag = 1; - $('.ui.basic.modal').modal({ - onDeny: function () { - flag = false - }, - onApprove: function () { - $.post(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/del_version`, { version_name: version_name }, (data) => { - if (data.VersionListCount === 0) { - location.href = `/${userName}/${repoPath}/modelarts/train-job` - } else { - $('#accordion' + version_name).remove() - } - - }).fail(function (err) { - console.log(err); - }); - flag = true - }, - onHidden: function () { - if (flag == false) { - $('.alert').html('您已取消操作').removeClass('alert-success').addClass('alert-danger').show().delay(1500).fadeOut(); - } - } - }) - .modal('show') - - } + diff --git a/web_src/js/features/cloudbrainShow.js b/web_src/js/features/cloudbrainShow.js index 691ac57c4..9c68722a7 100644 --- a/web_src/js/features/cloudbrainShow.js +++ b/web_src/js/features/cloudbrainShow.js @@ -213,6 +213,35 @@ export default async function initCloudrainSow() { }); e.stopPropagation(); }); + $(".delete-show-version").click(function (e) { + const ID = this.dataset.jobid; + const repoPath = this.dataset.repopath; + const version_name = this.dataset.version; + const url = `/api/v1/repos/${repoPath}/${ID}/del_version`; + $(".ui.basic.modal") + .modal({ + onApprove: function () { + $.post(url, { version_name: version_name }, (data) => { + console.log(data); + if (data.StatusOK === 0) { + if (data.VersionListCount === 0) { + location.href = `/${repoPath}`; + } else { + $("#accordion" + version_name).remove(); + } + refreshStatusShow(version_name, ID, repoPath); + } else { + return; + } + }).fail(function (err) { + console.log(err); + }); + }, + }) + .modal("show"); + + e.stopPropagation(); + }); function refreshStatusShow(version_name, ID, repoPath) { $.get( `/api/v1/repos/${repoPath}/${ID}?version_name=${version_name}`, diff --git a/web_src/js/features/cloudrbanin.js b/web_src/js/features/cloudrbanin.js index 445f70be3..2df780233 100644 --- a/web_src/js/features/cloudrbanin.js +++ b/web_src/js/features/cloudrbanin.js @@ -182,6 +182,15 @@ export default async function initCloudrain() { "CREATE_FAILED", "STOPPED", ]; + let deleteArray = [ + "KILLED", + "FAILED", + "START_FAILED", + "COMPLETED", + "SUCCEEDED", + "CREATE_FAILED", + "STOPPED", + ]; $.get( `/api/v1/repos/${repoPath}/${jobID}?version_name=${versionname}`, (data) => { @@ -194,6 +203,10 @@ export default async function initCloudrain() { if (stopArray.includes(data.JobStatus)) { $("#" + versionname + "-stop").addClass("disabled"); } + if (deleteArray.includes(data.JobStatus)) { + $(`#${versionname}-delete`).removeClass("disabled"); + $(`#${versionname}-delete`).addClass("blue"); + } if (data.JobStatus === "COMPLETED") { $("#" + versionname + "-create-model") .removeClass("disabled") From 7931fb64174868055010cc7604391466ec4271ab Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 23 Aug 2022 11:48:34 +0800 Subject: [PATCH 4/6] fix issue --- templates/repo/modelarts/trainjob/version_new.tmpl | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index f0bab8e82..e5d4d954a 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -73,6 +73,11 @@ + {{template "custom/wait_count_train" Dict "ctx" $}} +
+ + {{.i18n.Tr "cloudbrain.train_dataset_path_rule" | Safe}} +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

@@ -152,26 +157,9 @@ {{.i18n.Tr "cloudbrain.view_sample"}}
- -
- {{.i18n.Tr "cloudbrain.train_dataset_path_rule"}}
{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} From a32dfa568b0b5030db885672e4c2db8beae40fcd Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Tue, 23 Aug 2022 11:54:53 +0800 Subject: [PATCH 5/6] fix invalid token --- go.sum | 2 ++ modules/cloudbrain/resty.go | 46 +++++++++++++++++++++---------------------- modules/setting/cloudbrain.go | 2 ++ 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/go.sum b/go.sum index 6735a1938..e0c11f261 100755 --- a/go.sum +++ b/go.sum @@ -713,12 +713,14 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1 github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v1.0.1 h1:voD4ITNjPL5jjBfgR/r8fPIIBrliWrWHeiJApdr3r4w= github.com/smartystreets/assertions v1.0.1/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= +github.com/smartystreets/assertions v1.1.0 h1:MkTeG1DMwsrdH7QtLXy5W+fUxWq+vmb6cLmyJ7aRtF0= github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 h1:WN9BUFbdyOsSH/XohnWpXOlq9NBD5sGAB2FciQMUEe8= github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= diff --git a/modules/cloudbrain/resty.go b/modules/cloudbrain/resty.go index a7a19ed10..d9db3bbb5 100755 --- a/modules/cloudbrain/resty.go +++ b/modules/cloudbrain/resty.go @@ -24,10 +24,11 @@ var ( ) const ( - JobHasBeenStopped = "S410" - Public = "public" - Custom = "custom" - LogPageSize = 500 + JobHasBeenStopped = "S410" + errInvalidToken = "S401" + Public = "public" + Custom = "custom" + LogPageSize = 500 LogPageTokenExpired = "5m" pageSize = 15 QueuesDetailUrl = "/rest-server/api/v2/queuesdetail" @@ -59,7 +60,7 @@ func loginCloudbrain() error { res, err := client.R(). SetHeader("Content-Type", "application/json"). - SetBody(map[string]interface{}{"username": username, "password": password, "expiration": "604800"}). + SetBody(map[string]interface{}{"username": username, "password": password, "expiration": conf.Expiration}). SetResult(&loginResult). Post(HOST + "/rest-server/api/v1/token") if err != nil { @@ -122,16 +123,12 @@ sendjob: Post(HOST + "/rest-server/api/v1/jobs/") if err != nil { - if res != nil { - var response models.CloudBrainResult - json.Unmarshal(res.Body(), &response) - log.Error("code(%s), msg(%s)", response.Code, response.Msg) - return nil, fmt.Errorf(response.Msg) - } return nil, fmt.Errorf("resty create job: %s", err) } - if jobResult.Code == "S401" && retry < 1 { + var response models.CloudBrainResult + json.Unmarshal(res.Body(), &response) + if response.Code == errInvalidToken && retry < 1 { retry++ _ = loginCloudbrain() goto sendjob @@ -163,7 +160,9 @@ sendjob: return nil, fmt.Errorf("resty GetJob: %v", err) } - if getJobResult.Code == "S401" && retry < 1 { + var response models.CloudBrainResult + json.Unmarshal(res.Body(), &response) + if response.Code == errInvalidToken && retry < 1 { retry++ _ = loginCloudbrain() goto sendjob @@ -196,13 +195,8 @@ sendjob: } var response models.CloudBrainResult - err = json.Unmarshal(res.Body(), &response) - if err != nil { - log.Error("json.Unmarshal failed: %s", err.Error()) - return &getImagesResult, fmt.Errorf("json.Unmarshal failed: %s", err.Error()) - } - - if response.Code == "S401" && retry < 1 { + json.Unmarshal(res.Body(), &response) + if response.Code == errInvalidToken && retry < 1 { retry++ _ = loginCloudbrain() goto sendjob @@ -290,7 +284,9 @@ sendjob: return fmt.Errorf("resty CommitImage: %v", err) } - if result.Code == "S401" && retry < 1 { + var response models.CloudBrainResult + json.Unmarshal(res.Body(), &response) + if response.Code == errInvalidToken && retry < 1 { retry++ _ = loginCloudbrain() goto sendjob @@ -442,7 +438,9 @@ sendjob: return fmt.Errorf("resty StopJob: %v", err) } - if result.Code == "S401" && retry < 1 { + var response models.CloudBrainResult + json.Unmarshal(res.Body(), &response) + if response.Code == errInvalidToken && retry < 1 { retry++ _ = loginCloudbrain() goto sendjob @@ -498,7 +496,7 @@ func GetJobAllLog(scrollID string) (*models.GetJobLogResult, error) { client := getRestyClient() var result models.GetJobLogResult req := models.GetAllJobLogParams{ - Scroll: LogPageTokenExpired, + Scroll: LogPageTokenExpired, ScrollID: scrollID, } @@ -522,7 +520,7 @@ func GetJobAllLog(scrollID string) (*models.GetJobLogResult, error) { return &result, nil } -func DeleteJobLogToken(scrollID string) (error) { +func DeleteJobLogToken(scrollID string) error { checkSetting() client := getRestyClient() var result models.DeleteJobLogTokenResult diff --git a/modules/setting/cloudbrain.go b/modules/setting/cloudbrain.go index 2d80eea25..c06676243 100755 --- a/modules/setting/cloudbrain.go +++ b/modules/setting/cloudbrain.go @@ -5,6 +5,7 @@ type CloudbrainLoginConfig struct { Password string Host string ImageURLPrefix string + Expiration string } var ( @@ -17,5 +18,6 @@ func GetCloudbrainConfig() CloudbrainLoginConfig { Cloudbrain.Password = cloudbrainSec.Key("PASSWORD").MustString("") Cloudbrain.Host = cloudbrainSec.Key("REST_SERVER_HOST").MustString("") Cloudbrain.ImageURLPrefix = cloudbrainSec.Key("IMAGE_URL_PREFIX").MustString("") + Cloudbrain.Expiration = cloudbrainSec.Key("EXPIRATION").MustString("604800") return Cloudbrain } From 27d41546acb8c5d015e523c8f750d96b401ecb9c Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 23 Aug 2022 12:06:30 +0800 Subject: [PATCH 6/6] fix issue --- web_src/js/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_src/js/index.js b/web_src/js/index.js index 1ec155461..738a31c7d 100755 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -3520,7 +3520,7 @@ function initVueComponents() { `${self.reposFilter}:${self.archivedFilter}:${self.privateFilter}`, count ); - self.finalPage = Math.floor(count / self.searchLimit) + 1; + self.finalPage = Math.ceil(count / self.searchLimit); self.updateHistory(); } }).always(() => {