@@ -453,29 +453,32 @@ type GetImagesPayload struct { | |||||
type CloudbrainsOptions struct { | type CloudbrainsOptions struct { | ||||
ListOptions | ListOptions | ||||
RepoID int64 // include all repos if empty | |||||
UserID int64 | |||||
JobID string | |||||
SortType string | |||||
CloudbrainIDs []int64 | |||||
JobStatus []string | |||||
JobStatusNot bool | |||||
Keyword string | |||||
Type int | |||||
JobTypes []string | |||||
VersionName string | |||||
IsLatestVersion string | |||||
JobTypeNot bool | |||||
NeedRepoInfo bool | |||||
RepoIDList []int64 | |||||
BeginTime time.Time | |||||
EndTime time.Time | |||||
ComputeResource string | |||||
BeginTimeUnix int64 | |||||
EndTimeUnix int64 | |||||
AiCenter string | |||||
NeedDeleteInfo string | |||||
Cluster string | |||||
RepoID int64 // include all repos if empty | |||||
UserID int64 | |||||
JobID string | |||||
SortType string | |||||
CloudbrainIDs []int64 | |||||
JobStatus []string | |||||
JobStatusNot bool | |||||
Keyword string | |||||
Type int | |||||
JobTypes []string | |||||
VersionName string | |||||
IsLatestVersion string | |||||
JobTypeNot bool | |||||
NeedRepoInfo bool | |||||
RepoIDList []int64 | |||||
BeginTime time.Time | |||||
EndTime time.Time | |||||
ComputeResource string | |||||
BeginTimeUnix int64 | |||||
EndTimeUnix int64 | |||||
AiCenter string | |||||
NeedDeleteInfo string | |||||
Cluster string | |||||
AccCardType string | |||||
AccCardsNum int | |||||
WorkServerNumber int | |||||
} | } | ||||
type TaskPod struct { | type TaskPod struct { | ||||
@@ -1563,7 +1566,8 @@ type CreateGrampusJobResponse struct { | |||||
type GetGrampusJobResponse struct { | type GetGrampusJobResponse struct { | ||||
GrampusResult | GrampusResult | ||||
JobInfo GrampusJobInfo `json:"otJob"` | |||||
JobInfo GrampusJobInfo `json:"otJob"` | |||||
ExitDiagnostics string `json:"exitDiagnostics"` | |||||
} | } | ||||
type GrampusNotebookResponse struct { | type GrampusNotebookResponse struct { | ||||
@@ -2435,18 +2439,44 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||||
) | ) | ||||
} | } | ||||
if opts.WorkServerNumber > 0 { | |||||
if opts.WorkServerNumber == 1 { | |||||
cond = cond.And(builder.Or( | |||||
builder.Eq{"cloudbrain.work_server_number": 0}, | |||||
builder.Eq{"cloudbrain.work_server_number": 1}, | |||||
builder.IsNull{"cloudbrain.work_server_number"}, | |||||
)) | |||||
} else { | |||||
cond = cond.And( | |||||
builder.Eq{"cloudbrain.work_server_number": opts.WorkServerNumber}, | |||||
) | |||||
} | |||||
} | |||||
if opts.AccCardType != "" { | |||||
cond = cond.And(builder.Eq{"cloudbrain_spec.acc_card_type": opts.AccCardType}) | |||||
} | |||||
if opts.AccCardsNum >= 0 { | |||||
cond = cond.And(builder.Eq{"cloudbrain_spec.acc_cards_num": opts.AccCardsNum}) | |||||
} | |||||
var count int64 | var count int64 | ||||
var err error | var err error | ||||
condition := "cloudbrain.user_id = `user`.id" | condition := "cloudbrain.user_id = `user`.id" | ||||
if len(opts.Keyword) == 0 { | if len(opts.Keyword) == 0 { | ||||
count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain)) | |||||
count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | |||||
Join("left", "`user`", condition). | |||||
Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). | |||||
Count(new(CloudbrainInfo)) | |||||
} else { | } else { | ||||
lowerKeyWord := strings.ToLower(opts.Keyword) | lowerKeyWord := strings.ToLower(opts.Keyword) | ||||
cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, | cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, | ||||
builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) | builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) | ||||
count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | ||||
Join("left", "`user`", condition).Count(new(CloudbrainInfo)) | |||||
Join("left", "`user`", condition). | |||||
Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). | |||||
Count(new(CloudbrainInfo)) | |||||
} | } | ||||
@@ -2468,6 +2498,7 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||||
cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) | cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) | ||||
if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). | ||||
Join("left", "`user`", condition). | Join("left", "`user`", condition). | ||||
Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). | |||||
Find(&cloudbrains); err != nil { | Find(&cloudbrains); err != nil { | ||||
return nil, 0, fmt.Errorf("Find: %v", err) | return nil, 0, fmt.Errorf("Find: %v", err) | ||||
} | } | ||||
@@ -198,7 +198,6 @@ sendjob: | |||||
SetAuthToken(TOKEN). | SetAuthToken(TOKEN). | ||||
SetResult(&result). | SetResult(&result). | ||||
Get(HOST + urlTrainJob + "/" + jobID) | Get(HOST + urlTrainJob + "/" + jobID) | ||||
if err != nil { | if err != nil { | ||||
return nil, fmt.Errorf("resty GetJob: %v", err) | return nil, fmt.Errorf("resty GetJob: %v", err) | ||||
} | } | ||||
@@ -22,9 +22,9 @@ import ( | |||||
const ( | const ( | ||||
//notebook | //notebook | ||||
storageTypeOBS = "obs" | |||||
autoStopDuration = 4 * 60 * 60 | |||||
AutoStopDurationMs = 4 * 60 * 60 * 1000 | |||||
storageTypeOBS = "obs" | |||||
autoStopDuration = 4 * 60 * 60 | |||||
AutoStopDurationMs = 4 * 60 * 60 * 1000 | |||||
CodePath = "/code/" | CodePath = "/code/" | ||||
OutputPath = "/output/" | OutputPath = "/output/" | ||||
@@ -168,7 +168,6 @@ type OrgMultiNode struct { | |||||
Node []int `json:"node"` | Node []int `json:"node"` | ||||
} | } | ||||
type Parameters struct { | type Parameters struct { | ||||
Parameter []struct { | Parameter []struct { | ||||
Label string `json:"label"` | Label string `json:"label"` | ||||
@@ -645,7 +645,7 @@ func GetAllCloudbrainsPeriodDistribution(ctx *context.Context) { | |||||
} | } | ||||
} | } | ||||
ComputeResourceList := []string{"CPU/GPU", "NPU"} | |||||
ComputeResourceList := []string{"CPU/GPU", "NPU", "GCU"} | |||||
for _, v := range ComputeResourceList { | for _, v := range ComputeResourceList { | ||||
if _, ok := cloudBrainComputeResource[v]; !ok { | if _, ok := cloudBrainComputeResource[v]; !ok { | ||||
cloudBrainComputeResource[v] = 0 | cloudBrainComputeResource[v] = 0 | ||||
@@ -687,7 +687,6 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||||
return | return | ||||
} | } | ||||
recordBeginTime := recordCloudbrain[0].Cloudbrain.CreatedUnix | recordBeginTime := recordCloudbrain[0].Cloudbrain.CreatedUnix | ||||
endTime := time.Now() | |||||
listType := ctx.Query("listType") | listType := ctx.Query("listType") | ||||
jobType := ctx.Query("jobType") | jobType := ctx.Query("jobType") | ||||
jobStatus := ctx.Query("jobStatus") | jobStatus := ctx.Query("jobStatus") | ||||
@@ -695,6 +694,33 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||||
aiCenter := ctx.Query("aiCenter") | aiCenter := ctx.Query("aiCenter") | ||||
needDeleteInfo := ctx.Query("needDeleteInfo") | needDeleteInfo := ctx.Query("needDeleteInfo") | ||||
accCardType := ctx.Query("accCardType") | |||||
accCardsNum := ctx.QueryInt("accCardsNum") | |||||
workServerNumber := ctx.QueryInt("workServerNumber") | |||||
beginTimeStr := ctx.QueryTrim("beginTime") | |||||
endTimeStr := ctx.QueryTrim("endTime") | |||||
var beginTimeUnix int64 | |||||
var endTimeUnix int64 | |||||
if beginTimeStr == "" || endTimeStr == "" { | |||||
beginTimeUnix = int64(recordBeginTime) | |||||
endTimeUnix = time.Now().Unix() | |||||
} else { | |||||
beginTime, err := time.ParseInLocation("2006-01-02T15:04:05", beginTimeStr, time.Local) | |||||
if err != nil { | |||||
log.Error("Can not ParseInLocation.", err) | |||||
ctx.Error(http.StatusBadRequest, ctx.Tr("ParseInLocation_get_error")) | |||||
return | |||||
} | |||||
beginTimeUnix = beginTime.Unix() | |||||
endTime, err := time.ParseInLocation("2006-01-02T15:04:05", endTimeStr, time.Local) | |||||
if err != nil { | |||||
log.Error("Can not ParseInLocation.", err) | |||||
ctx.Error(http.StatusBadRequest, ctx.Tr("ParseInLocation_get_error")) | |||||
return | |||||
} | |||||
endTimeUnix = endTime.Unix() | |||||
} | |||||
if cloudBrainType == models.TypeCloudBrainOne && aiCenter == models.AICenterOfCloudBrainOne { | if cloudBrainType == models.TypeCloudBrainOne && aiCenter == models.AICenterOfCloudBrainOne { | ||||
aiCenter = "" | aiCenter = "" | ||||
} | } | ||||
@@ -753,18 +779,21 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||||
Page: page, | Page: page, | ||||
PageSize: pageSize, | PageSize: pageSize, | ||||
}, | }, | ||||
Keyword: keyword, | |||||
Type: cloudBrainType, | |||||
ComputeResource: listType, | |||||
JobTypeNot: jobTypeNot, | |||||
JobStatusNot: jobStatusNot, | |||||
JobStatus: jobStatuses, | |||||
JobTypes: jobTypes, | |||||
NeedRepoInfo: true, | |||||
BeginTimeUnix: int64(recordBeginTime), | |||||
EndTimeUnix: endTime.Unix(), | |||||
AiCenter: aiCenter, | |||||
NeedDeleteInfo: needDeleteInfo, | |||||
Keyword: keyword, | |||||
Type: cloudBrainType, | |||||
ComputeResource: listType, | |||||
JobTypeNot: jobTypeNot, | |||||
JobStatusNot: jobStatusNot, | |||||
JobStatus: jobStatuses, | |||||
JobTypes: jobTypes, | |||||
NeedRepoInfo: true, | |||||
BeginTimeUnix: beginTimeUnix, | |||||
EndTimeUnix: endTimeUnix, | |||||
AiCenter: aiCenter, | |||||
NeedDeleteInfo: needDeleteInfo, | |||||
AccCardType: accCardType, | |||||
AccCardsNum: accCardsNum, | |||||
WorkServerNumber: workServerNumber, | |||||
}) | }) | ||||
if err != nil { | if err != nil { | ||||
ctx.ServerError("Get job failed:", err) | ctx.ServerError("Get job failed:", err) | ||||
@@ -1039,7 +1068,7 @@ func getCloudbrainCount(beginTime time.Time, endTime time.Time, cloudbrains []*m | |||||
} | } | ||||
} | } | ||||
ComputeResourceList := []string{"CPU/GPU", "NPU"} | |||||
ComputeResourceList := []string{"CPU/GPU", "NPU", "GCU"} | |||||
for _, v := range ComputeResourceList { | for _, v := range ComputeResourceList { | ||||
if _, ok := cloudBrainComputeResource[v]; !ok { | if _, ok := cloudBrainComputeResource[v]; !ok { | ||||
cloudBrainComputeResource[v] = 0 | cloudBrainComputeResource[v] = 0 | ||||
@@ -1359,6 +1359,23 @@ func GrampusGetLog(ctx *context.Context) { | |||||
}) | }) | ||||
return | return | ||||
} | } | ||||
result, err := grampus.GetJob(jobID) | |||||
if err != nil { | |||||
log.Error("GetJob(%s) failed:%v", job.JobName, err) | |||||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||||
"JobName": job.JobName, | |||||
"Content": content, | |||||
"CanLogDownload": false, | |||||
}) | |||||
return | |||||
} | |||||
if result != nil { | |||||
job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||||
if job.Status == models.GrampusStatusFailed { | |||||
content = content + "\n" + result.ExitDiagnostics | |||||
} | |||||
} | |||||
canLogDownload := err == nil && job.IsUserHasRight(ctx.User) | canLogDownload := err == nil && job.IsUserHasRight(ctx.User) | ||||
ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
"JobName": job.JobName, | "JobName": job.JobName, | ||||