@@ -453,29 +453,32 @@ type GetImagesPayload struct { | |||
type CloudbrainsOptions struct { | |||
ListOptions | |||
RepoID int64 // include all repos if empty | |||
UserID int64 | |||
JobID string | |||
SortType string | |||
CloudbrainIDs []int64 | |||
JobStatus []string | |||
JobStatusNot bool | |||
Keyword string | |||
Type int | |||
JobTypes []string | |||
VersionName string | |||
IsLatestVersion string | |||
JobTypeNot bool | |||
NeedRepoInfo bool | |||
RepoIDList []int64 | |||
BeginTime time.Time | |||
EndTime time.Time | |||
ComputeResource string | |||
BeginTimeUnix int64 | |||
EndTimeUnix int64 | |||
AiCenter string | |||
NeedDeleteInfo string | |||
Cluster string | |||
RepoID int64 // include all repos if empty | |||
UserID int64 | |||
JobID string | |||
SortType string | |||
CloudbrainIDs []int64 | |||
JobStatus []string | |||
JobStatusNot bool | |||
Keyword string | |||
Type int | |||
JobTypes []string | |||
VersionName string | |||
IsLatestVersion string | |||
JobTypeNot bool | |||
NeedRepoInfo bool | |||
RepoIDList []int64 | |||
BeginTime time.Time | |||
EndTime time.Time | |||
ComputeResource string | |||
BeginTimeUnix int64 | |||
EndTimeUnix int64 | |||
AiCenter string | |||
NeedDeleteInfo string | |||
Cluster string | |||
AccCardType string | |||
AccCardsNum int | |||
WorkServerNumber int | |||
} | |||
type TaskPod struct { | |||
@@ -1563,7 +1566,8 @@ type CreateGrampusJobResponse struct { | |||
type GetGrampusJobResponse struct { | |||
GrampusResult | |||
JobInfo GrampusJobInfo `json:"otJob"` | |||
JobInfo GrampusJobInfo `json:"otJob"` | |||
ExitDiagnostics string `json:"exitDiagnostics"` | |||
} | |||
type GrampusNotebookResponse struct { | |||
@@ -2435,18 +2439,44 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
) | |||
} | |||
if opts.WorkServerNumber > 0 { | |||
if opts.WorkServerNumber == 1 { | |||
cond = cond.And(builder.Or( | |||
builder.Eq{"cloudbrain.work_server_number": 0}, | |||
builder.Eq{"cloudbrain.work_server_number": 1}, | |||
builder.IsNull{"cloudbrain.work_server_number"}, | |||
)) | |||
} else { | |||
cond = cond.And( | |||
builder.Eq{"cloudbrain.work_server_number": opts.WorkServerNumber}, | |||
) | |||
} | |||
} | |||
if opts.AccCardType != "" { | |||
cond = cond.And(builder.Eq{"cloudbrain_spec.acc_card_type": opts.AccCardType}) | |||
} | |||
if opts.AccCardsNum >= 0 { | |||
cond = cond.And(builder.Eq{"cloudbrain_spec.acc_cards_num": opts.AccCardsNum}) | |||
} | |||
var count int64 | |||
var err error | |||
condition := "cloudbrain.user_id = `user`.id" | |||
if len(opts.Keyword) == 0 { | |||
count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain)) | |||
count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | |||
Join("left", "`user`", condition). | |||
Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). | |||
Count(new(CloudbrainInfo)) | |||
} else { | |||
lowerKeyWord := strings.ToLower(opts.Keyword) | |||
cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, | |||
builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) | |||
count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | |||
Join("left", "`user`", condition).Count(new(CloudbrainInfo)) | |||
Join("left", "`user`", condition). | |||
Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). | |||
Count(new(CloudbrainInfo)) | |||
} | |||
@@ -2468,6 +2498,7 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) | |||
if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | |||
Join("left", "`user`", condition). | |||
Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). | |||
Find(&cloudbrains); err != nil { | |||
return nil, 0, fmt.Errorf("Find: %v", err) | |||
} | |||
@@ -198,7 +198,6 @@ sendjob: | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + urlTrainJob + "/" + jobID) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetJob: %v", err) | |||
} | |||
@@ -22,9 +22,9 @@ import ( | |||
const ( | |||
//notebook | |||
storageTypeOBS = "obs" | |||
autoStopDuration = 4 * 60 * 60 | |||
AutoStopDurationMs = 4 * 60 * 60 * 1000 | |||
storageTypeOBS = "obs" | |||
autoStopDuration = 4 * 60 * 60 | |||
AutoStopDurationMs = 4 * 60 * 60 * 1000 | |||
CodePath = "/code/" | |||
OutputPath = "/output/" | |||
@@ -168,7 +168,6 @@ type OrgMultiNode struct { | |||
Node []int `json:"node"` | |||
} | |||
type Parameters struct { | |||
Parameter []struct { | |||
Label string `json:"label"` | |||
@@ -645,7 +645,7 @@ func GetAllCloudbrainsPeriodDistribution(ctx *context.Context) { | |||
} | |||
} | |||
ComputeResourceList := []string{"CPU/GPU", "NPU"} | |||
ComputeResourceList := []string{"CPU/GPU", "NPU", "GCU"} | |||
for _, v := range ComputeResourceList { | |||
if _, ok := cloudBrainComputeResource[v]; !ok { | |||
cloudBrainComputeResource[v] = 0 | |||
@@ -687,7 +687,6 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||
return | |||
} | |||
recordBeginTime := recordCloudbrain[0].Cloudbrain.CreatedUnix | |||
endTime := time.Now() | |||
listType := ctx.Query("listType") | |||
jobType := ctx.Query("jobType") | |||
jobStatus := ctx.Query("jobStatus") | |||
@@ -695,6 +694,33 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||
aiCenter := ctx.Query("aiCenter") | |||
needDeleteInfo := ctx.Query("needDeleteInfo") | |||
accCardType := ctx.Query("accCardType") | |||
accCardsNum := ctx.QueryInt("accCardsNum") | |||
workServerNumber := ctx.QueryInt("workServerNumber") | |||
beginTimeStr := ctx.QueryTrim("beginTime") | |||
endTimeStr := ctx.QueryTrim("endTime") | |||
var beginTimeUnix int64 | |||
var endTimeUnix int64 | |||
if beginTimeStr == "" || endTimeStr == "" { | |||
beginTimeUnix = int64(recordBeginTime) | |||
endTimeUnix = time.Now().Unix() | |||
} else { | |||
beginTime, err := time.ParseInLocation("2006-01-02T15:04:05", beginTimeStr, time.Local) | |||
if err != nil { | |||
log.Error("Can not ParseInLocation.", err) | |||
ctx.Error(http.StatusBadRequest, ctx.Tr("ParseInLocation_get_error")) | |||
return | |||
} | |||
beginTimeUnix = beginTime.Unix() | |||
endTime, err := time.ParseInLocation("2006-01-02T15:04:05", endTimeStr, time.Local) | |||
if err != nil { | |||
log.Error("Can not ParseInLocation.", err) | |||
ctx.Error(http.StatusBadRequest, ctx.Tr("ParseInLocation_get_error")) | |||
return | |||
} | |||
endTimeUnix = endTime.Unix() | |||
} | |||
if cloudBrainType == models.TypeCloudBrainOne && aiCenter == models.AICenterOfCloudBrainOne { | |||
aiCenter = "" | |||
} | |||
@@ -753,18 +779,21 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||
Page: page, | |||
PageSize: pageSize, | |||
}, | |||
Keyword: keyword, | |||
Type: cloudBrainType, | |||
ComputeResource: listType, | |||
JobTypeNot: jobTypeNot, | |||
JobStatusNot: jobStatusNot, | |||
JobStatus: jobStatuses, | |||
JobTypes: jobTypes, | |||
NeedRepoInfo: true, | |||
BeginTimeUnix: int64(recordBeginTime), | |||
EndTimeUnix: endTime.Unix(), | |||
AiCenter: aiCenter, | |||
NeedDeleteInfo: needDeleteInfo, | |||
Keyword: keyword, | |||
Type: cloudBrainType, | |||
ComputeResource: listType, | |||
JobTypeNot: jobTypeNot, | |||
JobStatusNot: jobStatusNot, | |||
JobStatus: jobStatuses, | |||
JobTypes: jobTypes, | |||
NeedRepoInfo: true, | |||
BeginTimeUnix: beginTimeUnix, | |||
EndTimeUnix: endTimeUnix, | |||
AiCenter: aiCenter, | |||
NeedDeleteInfo: needDeleteInfo, | |||
AccCardType: accCardType, | |||
AccCardsNum: accCardsNum, | |||
WorkServerNumber: workServerNumber, | |||
}) | |||
if err != nil { | |||
ctx.ServerError("Get job failed:", err) | |||
@@ -1039,7 +1068,7 @@ func getCloudbrainCount(beginTime time.Time, endTime time.Time, cloudbrains []*m | |||
} | |||
} | |||
ComputeResourceList := []string{"CPU/GPU", "NPU"} | |||
ComputeResourceList := []string{"CPU/GPU", "NPU", "GCU"} | |||
for _, v := range ComputeResourceList { | |||
if _, ok := cloudBrainComputeResource[v]; !ok { | |||
cloudBrainComputeResource[v] = 0 | |||
@@ -1359,6 +1359,23 @@ func GrampusGetLog(ctx *context.Context) { | |||
}) | |||
return | |||
} | |||
result, err := grampus.GetJob(jobID) | |||
if err != nil { | |||
log.Error("GetJob(%s) failed:%v", job.JobName, err) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"JobName": job.JobName, | |||
"Content": content, | |||
"CanLogDownload": false, | |||
}) | |||
return | |||
} | |||
if result != nil { | |||
job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
if job.Status == models.GrampusStatusFailed { | |||
content = content + "\n" + result.ExitDiagnostics | |||
} | |||
} | |||
canLogDownload := err == nil && job.IsUserHasRight(ctx.User) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"JobName": job.JobName, | |||