Browse Source

Merge pull request 'fix-2452 GPU调试任务启动命令修改' (#2463) from fix-2452 into V20220718

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/2463
Reviewed-by: lewis <747342561@qq.com>
fix-2452
lewis 2 years ago
parent
commit
d57b790596
3 changed files with 14 additions and 5 deletions
  1. +7
    -2
      modules/cloudbrain/cloudbrain.go
  2. +4
    -0
      modules/setting/setting.go
  3. +3
    -3
      routers/repo/cloudbrain.go

+ 7
- 2
modules/cloudbrain/cloudbrain.go View File

@@ -17,7 +17,7 @@ import (
) )


const ( const (
Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CodeMountPath = "/code" CodeMountPath = "/code"
@@ -71,6 +71,11 @@ type GenerateCloudBrainTaskReq struct {
ResourceSpecId int ResourceSpecId int
} }


func GetCloudbrainDebugCommand() string {
var command = `pip3 install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;/usr/local/bin/python /usr/local/bin/jupyter-lab --ServerApp.shutdown_no_activity_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_inactive_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_idle_timeout=` + setting.CullIdleTimeout + ` --MappingKernelManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --ServerApp.token="" --ServerApp.allow_origin="self https://cloudbrain.pcl.ac.cn" `
return command
}

func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
if !ctx.IsSigned { if !ctx.IsSigned {
return false return false
@@ -507,7 +512,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e
GPUNumber: resourceSpec.GpuNum, GPUNumber: resourceSpec.GpuNum,
MemoryMB: resourceSpec.MemMiB, MemoryMB: resourceSpec.MemMiB,
ShmMB: resourceSpec.ShareMemMiB, ShmMB: resourceSpec.ShareMemMiB,
Command: Command,
Command: GetCloudbrainDebugCommand(),//Command,
NeedIBDevice: false, NeedIBDevice: false,
IsMainRole: false, IsMainRole: false,
UseNNI: false, UseNNI: false,


+ 4
- 0
modules/setting/setting.go View File

@@ -467,6 +467,8 @@ var (
TrainGpuTypes string TrainGpuTypes string
TrainResourceSpecs string TrainResourceSpecs string
MaxDatasetNum int MaxDatasetNum int
CullIdleTimeout string
CullInterval string


//benchmark config //benchmark config
IsBenchmarkEnabled bool IsBenchmarkEnabled bool
@@ -1315,6 +1317,8 @@ func NewContext() {
SpecialPools = sec.Key("SPECIAL_POOL").MustString("") SpecialPools = sec.Key("SPECIAL_POOL").MustString("")
MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5)
CullIdleTimeout = sec.Key("CULL_IDLE_TIMEOUT").MustString("900")
CullInterval = sec.Key("CULL_INTERVAL").MustString("60")


sec = Cfg.Section("benchmark") sec = Cfg.Section("benchmark")
IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false)


+ 3
- 3
routers/repo/cloudbrain.go View File

@@ -136,7 +136,7 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
} }


ctx.Data["attachments"] = attachs ctx.Data["attachments"] = attachs
ctx.Data["command"] = cloudbrain.Command
ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand()
ctx.Data["code_path"] = cloudbrain.CodeMountPath ctx.Data["code_path"] = cloudbrain.CodeMountPath
ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath
ctx.Data["model_path"] = cloudbrain.ModelMountPath ctx.Data["model_path"] = cloudbrain.ModelMountPath
@@ -315,7 +315,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
return return
} }


command := cloudbrain.Command
command := cloudbrain.GetCloudbrainDebugCommand()
if jobType == string(models.JobTypeTrain) { if jobType == string(models.JobTypeTrain) {
tpl = tplCloudBrainTrainJobNew tpl = tplCloudBrainTrainJobNew
commandTrain, err := getTrainJobCommand(form) commandTrain, err := getTrainJobCommand(form)
@@ -2186,7 +2186,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
repo := ctx.Repo.Repository repo := ctx.Repo.Repository


tpl := tplCloudBrainBenchmarkNew tpl := tplCloudBrainBenchmarkNew
command := cloudbrain.Command
command := cloudbrain.GetCloudbrainDebugCommand()


tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
if err == nil { if err == nil {


Loading…
Cancel
Save