diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 6e8ee1505..ceb552811 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1110,7 +1110,8 @@ func UpdateJob(job *Cloudbrain) error { func updateJob(e Engine, job *Cloudbrain) error { var sess *xorm.Session sess = e.Where("job_id = ?", job.JobID) - _, err := sess.Cols("status", "container_id", "container_ip").Update(job) + //_, err := sess.Cols("status", "container_id", "container_ip").Update(job) + _, err := sess.Update(job) return err } diff --git a/modules/cron/tasks_basic.go b/modules/cron/tasks_basic.go index 294690d45..b9838e66f 100755 --- a/modules/cron/tasks_basic.go +++ b/modules/cron/tasks_basic.go @@ -134,7 +134,7 @@ func registerHandleBlockChainUnSuccessRepos() { RegisterTaskFatal("handle_blockchain_unsuccess_repos", &BaseConfig{ Enabled: true, RunAtStart: true, - Schedule: "@every 1m", + Schedule: "@every 10m", }, func(ctx context.Context, _ *models.User, _ Config) error { repo.HandleBlockChainUnSuccessRepos() return nil @@ -145,7 +145,7 @@ func registerHandleBlockChainMergedPulls() { RegisterTaskFatal("handle_blockchain_merged_pull", &BaseConfig{ Enabled: true, RunAtStart: true, - Schedule: "@every 1m", + Schedule: "@every 10m", }, func(ctx context.Context, _ *models.User, _ Config) error { repo.HandleBlockChainMergedPulls() return nil @@ -156,7 +156,7 @@ func registerHandleBlockChainUnSuccessCommits() { RegisterTaskFatal("handle_blockchain_unsuccess_commits", &BaseConfig{ Enabled: true, RunAtStart: true, - Schedule: "@every 3m", + Schedule: "@every 10m", }, func(ctx context.Context, _ *models.User, _ Config) error { repo.HandleBlockChainUnSuccessCommits() return nil diff --git a/modules/setting/setting.go b/modules/setting/setting.go index f7b78235b..dd51623c1 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -448,23 +448,24 @@ var ( GpuTypes string DebugServerHost string ResourceSpecs string + MaxDuration int64 //benchmark config IsBenchmarkEnabled bool - BenchmarkOwner string + BenchmarkOwner string BenchmarkName string BenchmarkServerHost string BenchmarkCategory string //snn4imagenet config IsSnn4imagenetEnabled bool - Snn4imagenetOwner string + Snn4imagenetOwner string Snn4imagenetName string Snn4imagenetServerHost string //snn4imagenet config IsBrainScoreEnabled bool - BrainScoreOwner string + BrainScoreOwner string BrainScoreName string BrainScoreServerHost string @@ -1238,6 +1239,7 @@ func NewContext() { JobType = sec.Key("GPU_TYPE_DEFAULT").MustString("openidebug") GpuTypes = sec.Key("GPU_TYPES").MustString("") ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") + MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index be5eab52d..04af5dba4 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -822,10 +822,24 @@ func SyncCloudbrainStatus() { taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) task.Status = taskRes.TaskStatuses[0].State if task.Status != string(models.JobWaiting) { + task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) - continue + } + + if task.Duration >= setting.MaxDuration { + log.Info("begin to stop job(%s), because of the duration", task.JobName) + err = cloudbrain.StopJob(task.JobID) + if err != nil { + log.Error("StopJob(%s) failed:%v", task.JobName, err) + continue + } + task.Status = string(models.JobStopped) + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + } } } }