diff --git a/models/cloudbrain.go b/models/cloudbrain.go index e0df17753..f84d994bc 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1470,7 +1470,7 @@ type GetNotebookListResult struct { NotebookList []NotebookList `json:"data"` } -//Grampus +// Grampus type GrampusResult struct { ErrorCode int `json:"errorCode"` ErrorMsg string `json:"errorMsg"` @@ -2200,8 +2200,10 @@ func GetGPUStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) Find(&cloudbrains) } -/** - 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间 +/* +* + + 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间 */ func GetGPUStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) @@ -2664,7 +2666,7 @@ type DatasetInfo struct { Name string FullName string Type int - Size int + Size int64 } func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { @@ -2727,7 +2729,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn Name: fileName, FullName: attach.Name, Type: attach.Type, - Size: int(attach.Size), + Size: attach.Size, } if i == 0 { datasetNames = attach.Name diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 5fc02c609..ecd2598b7 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3322,6 +3322,7 @@ Stopped_failed=Fail to stop the job, please try again later. Stopped_success_update_status_fail=Succeed in stopping th job, but failed to update the job status and duration time. load_code_failed=Fail to load code, please check if the right branch is selected. +error.debug_datasetsize = The size of dataset exceeds limitation (%dGB) error.dataset_select = dataset select error:the count exceed the limit or has same name error.partial_datasets_not_available = There are non-existent or deleted files in the selected dataset file, please select again new_train_gpu_tooltips = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the run parameter %s, and please put your model into %s then you can download it online diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 4dc2e4c89..5cef94087 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -3343,7 +3343,7 @@ Stopped_failed=任务停止失败,请稍后再试。 Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。 load_code_failed=代码加载失败,请确认选择了正确的分支。 -error.debug_datasetsize = 数据集大小超过限制('%d'GB) +error.debug_datasetsize = 数据集大小超过限制(%dGB) error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集 error.partial_datasets_not_available = 选择的数据集文件中有不存在或已删除的文件,请重新选择 new_train_gpu_tooltips = 训练脚本存储在 %s 中,数据集存储在 %s 中,预训练模型存放在运行参数 %s 中,训练输出请存储在 %s 中以供后续下载。 diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index cf6df6312..6d25511cc 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -285,7 +285,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } var datasetInfos map[string]models.DatasetInfo var datasetNames string - var attachSize int + var attachSize int64 if uuids != "" { datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids) if err != nil { @@ -299,10 +299,10 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int(setting.DebugAttachSize*1000*1000*1000) { - log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) // GB + if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tpl, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form) return } } @@ -603,8 +603,10 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra } -/** - 检查用户传输的参数是否符合专属资源池 +/* +* + + 检查用户传输的参数是否符合专属资源池 */ func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string { if cloudbrain.SpecialPools != nil { @@ -746,7 +748,6 @@ func CloudBrainRestart(ctx *context.Context) { }) } - func hasDatasetDeleted(task *models.Cloudbrain) bool { if task.Uuid == "" { return false @@ -1321,8 +1322,8 @@ func DeleteJobsByRepoID(repoID int64) { DeleteJobs(cloudBrains) } -/** - +/* +* */ func StopJobs(cloudBrains []*models.Cloudbrain) { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 05f857d86..74e2f06d6 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -202,6 +202,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook var datasetInfos map[string]models.DatasetInfo var datasetNames string + var attachSize int64 //var if uuid != "" { datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, computeSourceSimple) @@ -217,6 +218,15 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook ctx.RenderWithErr(ctx.Tr("cloudbrain.error.partial_datasets_not_available"), tpl, &form) return } + for _, infos := range datasetInfos { + attachSize += infos.Size + } + if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form) + return + } } //prepare code and out path @@ -1377,9 +1387,9 @@ func GrampusGetLog(ctx *context.Context) { return } - content, err := grampus.GetTrainJobLog(job.JobID) + result, err := grampus.GetJob(jobID) if err != nil { - log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) + log.Error("GetJob(%s) failed:%v", job.JobName, err) ctx.JSON(http.StatusOK, map[string]interface{}{ "JobName": job.JobName, "Content": "", @@ -1387,20 +1397,26 @@ func GrampusGetLog(ctx *context.Context) { }) return } - result, err := grampus.GetJob(jobID) + exitDiagnostics := "" + if result != nil { + exitDiagnostics = result.ExitDiagnostics + } + + content, err := grampus.GetTrainJobLog(job.JobID) if err != nil { - log.Error("GetJob(%s) failed:%v", job.JobName, err) + log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) ctx.JSON(http.StatusOK, map[string]interface{}{ "JobName": job.JobName, - "Content": content, + "Content": exitDiagnostics, "CanLogDownload": false, }) return } + if result != nil { job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) if job.Status == models.GrampusStatusFailed { - content = content + "\n" + result.ExitDiagnostics + content = content + "\n" + exitDiagnostics } } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index fe109422e..6fec40651 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -220,16 +220,16 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm } var datasetInfos map[string]models.DatasetInfo - var attachSize int + var attachSize int64 if uuid != "" { datasetInfos, _, err = models.GetDatasetInfo(uuid) for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int(setting.DebugAttachSize*1000*1000*1000) { - log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) //GB + if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) //GB notebookNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tplModelArtsNotebookNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tplModelArtsNotebookNew, &form) return } } diff --git a/templates/repo/grampus/notebook/gcu/new.tmpl b/templates/repo/grampus/notebook/gcu/new.tmpl index eed6c7a29..8698447fb 100644 --- a/templates/repo/grampus/notebook/gcu/new.tmpl +++ b/templates/repo/grampus/notebook/gcu/new.tmpl @@ -3,7 +3,7 @@