Browse Source

Merge branch 'V20221228' into zouap

pull/3544/head
zouap 2 years ago
parent
commit
0d3e1ae883
7 changed files with 46 additions and 26 deletions
  1. +7
    -5
      models/cloudbrain.go
  2. +1
    -0
      options/locale/locale_en-US.ini
  3. +1
    -1
      options/locale/locale_zh-CN.ini
  4. +10
    -9
      routers/repo/cloudbrain.go
  5. +22
    -6
      routers/repo/grampus.go
  6. +4
    -4
      routers/repo/modelarts.go
  7. +1
    -1
      templates/repo/grampus/notebook/gcu/new.tmpl

+ 7
- 5
models/cloudbrain.go View File

@@ -1470,7 +1470,7 @@ type GetNotebookListResult struct {
NotebookList []NotebookList `json:"data"` NotebookList []NotebookList `json:"data"`
} }


//Grampus
// Grampus
type GrampusResult struct { type GrampusResult struct {
ErrorCode int `json:"errorCode"` ErrorCode int `json:"errorCode"`
ErrorMsg string `json:"errorMsg"` ErrorMsg string `json:"errorMsg"`
@@ -2200,8 +2200,10 @@ func GetGPUStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error)
Find(&cloudbrains) Find(&cloudbrains)
} }


/**
本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间
/*
*

本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间
*/ */
func GetGPUStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { func GetGPUStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) {
cloudbrains := make([]*Cloudbrain, 0, 10) cloudbrains := make([]*Cloudbrain, 0, 10)
@@ -2664,7 +2666,7 @@ type DatasetInfo struct {
Name string Name string
FullName string FullName string
Type int Type int
Size int
Size int64
} }


func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) {
@@ -2727,7 +2729,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn
Name: fileName, Name: fileName,
FullName: attach.Name, FullName: attach.Name,
Type: attach.Type, Type: attach.Type,
Size: int(attach.Size),
Size: attach.Size,
} }
if i == 0 { if i == 0 {
datasetNames = attach.Name datasetNames = attach.Name


+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -3322,6 +3322,7 @@ Stopped_failed=Fail to stop the job, please try again later.
Stopped_success_update_status_fail=Succeed in stopping th job, but failed to update the job status and duration time. Stopped_success_update_status_fail=Succeed in stopping th job, but failed to update the job status and duration time.
load_code_failed=Fail to load code, please check if the right branch is selected. load_code_failed=Fail to load code, please check if the right branch is selected.


error.debug_datasetsize = The size of dataset exceeds limitation (%dGB)
error.dataset_select = dataset select error:the count exceed the limit or has same name error.dataset_select = dataset select error:the count exceed the limit or has same name
error.partial_datasets_not_available = There are non-existent or deleted files in the selected dataset file, please select again error.partial_datasets_not_available = There are non-existent or deleted files in the selected dataset file, please select again
new_train_gpu_tooltips = The code is storaged in <strong style="color:#010101">%s</strong>, the dataset is storaged in <strong style="color:#010101">%s</strong>, the pre-trained model is storaged in the run parameter <strong style="color:#010101">%s</strong>, and please put your model into <strong style="color:#010101">%s</strong> then you can download it online new_train_gpu_tooltips = The code is storaged in <strong style="color:#010101">%s</strong>, the dataset is storaged in <strong style="color:#010101">%s</strong>, the pre-trained model is storaged in the run parameter <strong style="color:#010101">%s</strong>, and please put your model into <strong style="color:#010101">%s</strong> then you can download it online


+ 1
- 1
options/locale/locale_zh-CN.ini View File

@@ -3343,7 +3343,7 @@ Stopped_failed=任务停止失败,请稍后再试。
Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。 Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。
load_code_failed=代码加载失败,请确认选择了正确的分支。 load_code_failed=代码加载失败,请确认选择了正确的分支。


error.debug_datasetsize = 数据集大小超过限制('%d'GB)
error.debug_datasetsize = 数据集大小超过限制(%dGB)
error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集 error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集
error.partial_datasets_not_available = 选择的数据集文件中有不存在或已删除的文件,请重新选择 error.partial_datasets_not_available = 选择的数据集文件中有不存在或已删除的文件,请重新选择
new_train_gpu_tooltips = 训练脚本存储在 <strong style="color:#010101">%s</strong> 中,数据集存储在 <strong style="color:#010101">%s</strong> 中,预训练模型存放在运行参数 <strong style="color:#010101">%s</strong> 中,训练输出请存储在 <strong style="color:#010101">%s</strong> 中以供后续下载。 new_train_gpu_tooltips = 训练脚本存储在 <strong style="color:#010101">%s</strong> 中,数据集存储在 <strong style="color:#010101">%s</strong> 中,预训练模型存放在运行参数 <strong style="color:#010101">%s</strong> 中,训练输出请存储在 <strong style="color:#010101">%s</strong> 中以供后续下载。


+ 10
- 9
routers/repo/cloudbrain.go View File

@@ -285,7 +285,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
} }
var datasetInfos map[string]models.DatasetInfo var datasetInfos map[string]models.DatasetInfo
var datasetNames string var datasetNames string
var attachSize int
var attachSize int64
if uuids != "" { if uuids != "" {
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids) datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids)
if err != nil { if err != nil {
@@ -299,10 +299,10 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
for _, infos := range datasetInfos { for _, infos := range datasetInfos {
attachSize += infos.Size attachSize += infos.Size
} }
if attachSize > int(setting.DebugAttachSize*1000*1000*1000) {
log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) // GB
if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) {
log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB
cloudBrainNewDataPrepare(ctx, jobType) cloudBrainNewDataPrepare(ctx, jobType)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tpl, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form)
return return
} }
} }
@@ -603,8 +603,10 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra


} }


/**
检查用户传输的参数是否符合专属资源池
/*
*

检查用户传输的参数是否符合专属资源池
*/ */
func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string { func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string {
if cloudbrain.SpecialPools != nil { if cloudbrain.SpecialPools != nil {
@@ -746,7 +748,6 @@ func CloudBrainRestart(ctx *context.Context) {
}) })
} }



func hasDatasetDeleted(task *models.Cloudbrain) bool { func hasDatasetDeleted(task *models.Cloudbrain) bool {
if task.Uuid == "" { if task.Uuid == "" {
return false return false
@@ -1321,8 +1322,8 @@ func DeleteJobsByRepoID(repoID int64) {
DeleteJobs(cloudBrains) DeleteJobs(cloudBrains)
} }


/**
/*
*
*/ */
func StopJobs(cloudBrains []*models.Cloudbrain) { func StopJobs(cloudBrains []*models.Cloudbrain) {




+ 22
- 6
routers/repo/grampus.go View File

@@ -202,6 +202,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook


var datasetInfos map[string]models.DatasetInfo var datasetInfos map[string]models.DatasetInfo
var datasetNames string var datasetNames string
var attachSize int64
//var //var
if uuid != "" { if uuid != "" {
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, computeSourceSimple) datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, computeSourceSimple)
@@ -217,6 +218,15 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.partial_datasets_not_available"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.partial_datasets_not_available"), tpl, &form)
return return
} }
for _, infos := range datasetInfos {
attachSize += infos.Size
}
if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) {
log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB
grampusNotebookNewDataPrepare(ctx, processType)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form)
return
}
} }


//prepare code and out path //prepare code and out path
@@ -1377,9 +1387,9 @@ func GrampusGetLog(ctx *context.Context) {
return return
} }


content, err := grampus.GetTrainJobLog(job.JobID)
result, err := grampus.GetJob(jobID)
if err != nil { if err != nil {
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"])
log.Error("GetJob(%s) failed:%v", job.JobName, err)
ctx.JSON(http.StatusOK, map[string]interface{}{ ctx.JSON(http.StatusOK, map[string]interface{}{
"JobName": job.JobName, "JobName": job.JobName,
"Content": "", "Content": "",
@@ -1387,20 +1397,26 @@ func GrampusGetLog(ctx *context.Context) {
}) })
return return
} }
result, err := grampus.GetJob(jobID)
exitDiagnostics := ""
if result != nil {
exitDiagnostics = result.ExitDiagnostics
}

content, err := grampus.GetTrainJobLog(job.JobID)
if err != nil { if err != nil {
log.Error("GetJob(%s) failed:%v", job.JobName, err)
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, map[string]interface{}{ ctx.JSON(http.StatusOK, map[string]interface{}{
"JobName": job.JobName, "JobName": job.JobName,
"Content": content,
"Content": exitDiagnostics,
"CanLogDownload": false, "CanLogDownload": false,
}) })
return return
} }

if result != nil { if result != nil {
job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
if job.Status == models.GrampusStatusFailed { if job.Status == models.GrampusStatusFailed {
content = content + "\n" + result.ExitDiagnostics
content = content + "\n" + exitDiagnostics
} }
} }




+ 4
- 4
routers/repo/modelarts.go View File

@@ -220,16 +220,16 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
} }


var datasetInfos map[string]models.DatasetInfo var datasetInfos map[string]models.DatasetInfo
var attachSize int
var attachSize int64
if uuid != "" { if uuid != "" {
datasetInfos, _, err = models.GetDatasetInfo(uuid) datasetInfos, _, err = models.GetDatasetInfo(uuid)
for _, infos := range datasetInfos { for _, infos := range datasetInfos {
attachSize += infos.Size attachSize += infos.Size
} }
if attachSize > int(setting.DebugAttachSize*1000*1000*1000) {
log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) //GB
if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) {
log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) //GB
notebookNewDataPrepare(ctx) notebookNewDataPrepare(ctx)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tplModelArtsNotebookNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tplModelArtsNotebookNew, &form)
return return
} }
} }


+ 1
- 1
templates/repo/grampus/notebook/gcu/new.tmpl View File

@@ -3,7 +3,7 @@
<div class="repository"> <div class="repository">
{{template "repo/header" .}} {{template "repo/header" .}}
<div class="ui container"> <div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div>
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}" data-exceed-size="{{DebugAttachSize}}"></div>
{{if eq .NotStopTaskCount 0}} {{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}} {{template "base/alert" .}}
{{end}} {{end}}


Loading…
Cancel
Save