From 9714d66f86fbf1cefb46d2e587118d334b9d5e80 Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Wed, 8 Dec 2021 14:26:36 +0800 Subject: [PATCH 1/3] fix 988 --- models/cloudbrain.go | 3 ++- modules/cron/tasks_basic.go | 6 +++--- modules/setting/setting.go | 8 +++++--- routers/repo/cloudbrain.go | 16 +++++++++++++++- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 6e8ee1505..ceb552811 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1110,7 +1110,8 @@ func UpdateJob(job *Cloudbrain) error { func updateJob(e Engine, job *Cloudbrain) error { var sess *xorm.Session sess = e.Where("job_id = ?", job.JobID) - _, err := sess.Cols("status", "container_id", "container_ip").Update(job) + //_, err := sess.Cols("status", "container_id", "container_ip").Update(job) + _, err := sess.Update(job) return err } diff --git a/modules/cron/tasks_basic.go b/modules/cron/tasks_basic.go index 294690d45..b9838e66f 100755 --- a/modules/cron/tasks_basic.go +++ b/modules/cron/tasks_basic.go @@ -134,7 +134,7 @@ func registerHandleBlockChainUnSuccessRepos() { RegisterTaskFatal("handle_blockchain_unsuccess_repos", &BaseConfig{ Enabled: true, RunAtStart: true, - Schedule: "@every 1m", + Schedule: "@every 10m", }, func(ctx context.Context, _ *models.User, _ Config) error { repo.HandleBlockChainUnSuccessRepos() return nil @@ -145,7 +145,7 @@ func registerHandleBlockChainMergedPulls() { RegisterTaskFatal("handle_blockchain_merged_pull", &BaseConfig{ Enabled: true, RunAtStart: true, - Schedule: "@every 1m", + Schedule: "@every 10m", }, func(ctx context.Context, _ *models.User, _ Config) error { repo.HandleBlockChainMergedPulls() return nil @@ -156,7 +156,7 @@ func registerHandleBlockChainUnSuccessCommits() { RegisterTaskFatal("handle_blockchain_unsuccess_commits", &BaseConfig{ Enabled: true, RunAtStart: true, - Schedule: "@every 3m", + Schedule: "@every 10m", }, func(ctx context.Context, _ *models.User, _ Config) error { repo.HandleBlockChainUnSuccessCommits() return nil diff --git a/modules/setting/setting.go b/modules/setting/setting.go index f7b78235b..dd51623c1 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -448,23 +448,24 @@ var ( GpuTypes string DebugServerHost string ResourceSpecs string + MaxDuration int64 //benchmark config IsBenchmarkEnabled bool - BenchmarkOwner string + BenchmarkOwner string BenchmarkName string BenchmarkServerHost string BenchmarkCategory string //snn4imagenet config IsSnn4imagenetEnabled bool - Snn4imagenetOwner string + Snn4imagenetOwner string Snn4imagenetName string Snn4imagenetServerHost string //snn4imagenet config IsBrainScoreEnabled bool - BrainScoreOwner string + BrainScoreOwner string BrainScoreName string BrainScoreServerHost string @@ -1238,6 +1239,7 @@ func NewContext() { JobType = sec.Key("GPU_TYPE_DEFAULT").MustString("openidebug") GpuTypes = sec.Key("GPU_TYPES").MustString("") ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") + MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index be5eab52d..04af5dba4 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -822,10 +822,24 @@ func SyncCloudbrainStatus() { taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) task.Status = taskRes.TaskStatuses[0].State if task.Status != string(models.JobWaiting) { + task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) - continue + } + + if task.Duration >= setting.MaxDuration { + log.Info("begin to stop job(%s), because of the duration", task.JobName) + err = cloudbrain.StopJob(task.JobID) + if err != nil { + log.Error("StopJob(%s) failed:%v", task.JobName, err) + continue + } + task.Status = string(models.JobStopped) + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + } } } } From 00169d2d6b1cf894f55e73830dec16176417aa15 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 8 Dec 2021 14:59:03 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=90=8E=E7=AB=AFcandebug=E4=B8=8D?= =?UTF-8?q?=E5=86=8D=E5=85=B3=E6=B3=A8=E4=BB=BB=E5=8A=A1=E7=8A=B6=E6=80=81?= =?UTF-8?q?=EF=BC=8C=E5=89=8D=E7=AB=AF=E6=A0=B9=E6=8D=AE=E7=8A=B6=E6=80=81?= =?UTF-8?q?=E5=8F=98=E5=8C=96=E5=88=B7=E6=96=B0=E6=8C=89=E9=92=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/cloudbrain.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ae7163d77..8dcb405a2 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -71,14 +71,8 @@ func CloudBrainIndex(ctx *context.Context) { return } - timestamp := time.Now().Unix() for i, task := range ciTasks { - if task.Status == string(models.JobRunning) && (timestamp-int64(task.Cloudbrain.CreatedUnix) > 10) { - ciTasks[i].CanDebug = cloudbrain.CanCreateOrDebugJob(ctx) - } else { - ciTasks[i].CanDebug = false - } - + ciTasks[i].CanDebug = cloudbrain.CanCreateOrDebugJob(ctx) ciTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) } From a355fc941594f96819322bf4547f38eee0ab2b0e Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 8 Dec 2021 15:01:08 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/modelarts.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 483efbc20..3994ba542 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -69,11 +69,8 @@ func NotebookIndex(ctx *context.Context) { } for i, task := range ciTasks { - if task.Status == string(models.JobRunning) { - ciTasks[i].CanDebug = cloudbrain.CanCreateOrDebugJob(ctx) - } else { - ciTasks[i].CanDebug = false - } + + ciTasks[i].CanDebug = cloudbrain.CanCreateOrDebugJob(ctx) ciTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) }