From d689709ac33e4307124d86d2bb2606eccb5a549f Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 2 Nov 2021 15:54:20 +0800 Subject: [PATCH 01/16] fix bug --- models/cloudbrain.go | 1 + modules/modelarts/modelarts.go | 7 +++++++ routers/repo/modelarts.go | 27 ++++++++++++++------------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index be14c633e..563ab9d06 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -70,6 +70,7 @@ type Cloudbrain struct { VersionID int64 `xorm:"INDEX DEFAULT 0"` VersionName string Uuid string + DatasetName string User *User `xorm:"-"` Repo *Repository `xorm:"-"` diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index a0d068b56..5717d044f 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -195,6 +195,12 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { return err } + attach, err := models.GetAttachmentByUUID(req.Uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) + return nil + } + err = models.CreateCloudbrain(&models.Cloudbrain{ Status: TransTrainJobStatus(jobResult.Status), UserID: ctx.User.ID, @@ -206,6 +212,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { VersionID: jobResult.VersionID, VersionName: jobResult.VersionName, Uuid: req.Uuid, + DatasetName: attach.Name, }) if err != nil { diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index b37af704d..885a0e66b 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -35,10 +35,10 @@ const ( tplModelArtsNew base.TplName = "repo/modelarts/new" tplModelArtsShow base.TplName = "repo/modelarts/show" - tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index" - tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" - tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" - tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index" + tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index" + tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" + tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" + tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index" ) // MustEnableDataset check if repository enable internal cb @@ -644,9 +644,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { - log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err) + log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) - ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) + // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) + ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form) return } @@ -860,12 +861,12 @@ func TrainJobShow(ctx *context.Context) { return } - attach, err := models.GetAttachmentByUUID(task.Uuid) - if err != nil { - log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) - return - } + // attach, err := models.GetAttachmentByUUID(task.Uuid) + // if err != nil { + // log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error()) + // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + // return + // } result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { @@ -889,7 +890,7 @@ func TrainJobShow(ctx *context.Context) { return } - result.DatasetName = attach.Name + result.DatasetName = task.DatasetName } resultLogFile, resultLog, err := trainJobGetLog(jobID) From f3f0c4b920d32bbb8e58ed169aff36a30bc751c4 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 2 Nov 2021 15:56:37 +0800 Subject: [PATCH 02/16] fix issue --- templates/repo/modelarts/trainjob/index.tmpl | 4 ++-- templates/repo/modelarts/trainjob/new.tmpl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index 4337d6de8..7bc14d6a4 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -355,7 +355,7 @@
{{$.CsrfTokenHtml}} {{if $.Permission.CanWrite $.UnitTypeCloudBrain}} - + {{$.i18n.Tr "repo.stop"}} {{else}} @@ -535,7 +535,7 @@ $('#model-delete-'+jobID).removeClass('red') $('#model-delete-'+jobID).addClass('disabled') } - if(status=="KILLED" || status=="FAILED"){ + if(status=="KILLED" || status=="FAILED" || status=="KILLING"){ $('#stop-model-debug-'+jobID).removeClass('blue') $('#stop-model-debug-'+jobID).addClass('disabled') $('#model-delete-'+jobID).removeClass('disabled') diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index 6e98158d9..edce9954f 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -368,10 +368,10 @@ function Add_parameter(i){ value = '
' + '
' + - ' ' + + ' ' + '
' + '
' + - '' + + '' + '
'+ '' + '' + From e83fb94cb50280380e4259f68bb9ec292121e5d7 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 2 Nov 2021 16:58:01 +0800 Subject: [PATCH 03/16] fix issue --- options/locale/locale_en-US.ini | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 7d1d7a3e0..7fa8c7a4f 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -764,15 +764,15 @@ submit_image=Submit Image download=Download -cloudbrain=cloudbrain +cloudbrain=Cloudbrain cloudbrain.new=New cloudbrain -cloudbrain.desc=cloudbrain +cloudbrain.desc=Cloudbrain cloudbrain.cancel=Cancel -cloudbrain.commit_image = submit -clone_cnt=download -balance = balance -balance.total_view = total balance -balance.available = available balance: +cloudbrain.commit_image = Submit +clone_cnt=Download +balance = Balance +balance.total_view = Total Balance +balance.available = Available Balance: cloudbrain1 = cloudbrain1 cloudbrain2 = cloudbrain2 cloudbrain_selection = select cloudbrain @@ -787,7 +787,7 @@ cloudbrain_status_runtime = Running Time modelarts.notebook=Debug Task -modelarts.train_job=Create Task +modelarts.train_job=Train Task modelarts.train_job.new_debug= New Debug Task modelarts.train_job.new_train=New Train Task modelarts.train_job.config=Configuration information From af8705038356f16a819755a3ee15b4a1b9b43812 Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 2 Nov 2021 17:20:00 +0800 Subject: [PATCH 04/16] change data_url --- routers/repo/modelarts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 885a0e66b..ce316a0be 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -621,7 +621,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath - dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" + dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" //can, err := canUserCreateTrainJob(ctx.User.ID) //if err != nil { From 7e4f3501c5945e26eae87db7e472d089446f4878 Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 2 Nov 2021 18:09:15 +0800 Subject: [PATCH 05/16] fix bug 677 --- routers/repo/modelarts.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index ce316a0be..4152ddf53 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -646,6 +646,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["params"] = form.Params // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form) return @@ -757,6 +760,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewDataPrepare(ctx) + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["params"] = form.Params ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } From 478ac98fd3d106d17681d83d87ef2bfefc345989 Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 2 Nov 2021 18:53:00 +0800 Subject: [PATCH 06/16] fix bug 677 --- routers/repo/modelarts.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 4152ddf53..856826bd7 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -646,11 +646,18 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) + attach, err := models.GetAttachmentByUUID(uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) + return + } ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment + ctx.Data["datasetName"] = attach.Name ctx.Data["params"] = form.Params // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form) + // ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form) return } @@ -760,8 +767,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewDataPrepare(ctx) + attach, err := models.GetAttachmentByUUID(uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) + return + } ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment + ctx.Data["datasetName"] = attach.Name ctx.Data["params"] = form.Params ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return From b12e19adbe367f6a5124fa46971f00e141c02151 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 2 Nov 2021 18:56:44 +0800 Subject: [PATCH 07/16] fix issue --- templates/repo/modelarts/trainjob/new.tmpl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index edce9954f..794facaf0 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -235,7 +235,11 @@
-->
- + {{if .bootFile}} + + {{else}} + + {{end}} @@ -244,7 +248,11 @@
From b388c22818ebad61e528df20dd9a276693616984 Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Tue, 2 Nov 2021 19:57:01 +0800 Subject: [PATCH 08/16] fix 680 677 --- routers/repo/modelarts.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 856826bd7..ea8ab0c06 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "io" + "io/ioutil" "net/http" "os" "path" @@ -643,18 +644,26 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } + attach, err := models.GetAttachmentByUUID(uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) + return + } + + //todo: del the codeLocalPath + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) - attach, err := models.GetAttachmentByUUID(uuid) - if err != nil { - log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) - return - } + ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment ctx.Data["datasetName"] = attach.Name ctx.Data["params"] = form.Params + trainJobNewDataPrepare(ctx) // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form) // ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form) @@ -763,15 +772,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) Parameters: param, } - err := modelarts.GenerateTrainJob(ctx, req) + err = modelarts.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewDataPrepare(ctx) - attach, err := models.GetAttachmentByUUID(uuid) - if err != nil { - log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) - return - } ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment ctx.Data["datasetName"] = attach.Name From ca25780c9c91a4d12763a64b643ae80dc62d6790 Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Tue, 2 Nov 2021 20:06:10 +0800 Subject: [PATCH 09/16] mod length limit --- templates/repo/modelarts/trainjob/new.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index 794facaf0..8df38495e 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -165,7 +165,7 @@ -->
- +
- +
+
@@ -182,7 +178,7 @@
- {{range .engines}} {{end}} @@ -190,7 +186,7 @@
- {{range .engine_versions}} {{end}} @@ -246,16 +242,17 @@
- {{if $.uuid}} {{end}} {{range .attachments}} + {{end}}
- +
@@ -270,7 +267,7 @@