@@ -403,6 +403,8 @@ type BenchmarkDataset struct { | |||||
Id int `json:"id"` | Id int `json:"id"` | ||||
Value string `json:"value"` //二级算法类型名称 | Value string `json:"value"` //二级算法类型名称 | ||||
Attachment string `json:"attachment"` //数据集的uuid | Attachment string `json:"attachment"` //数据集的uuid | ||||
Owner string `json:"owner"` //评估脚本所在仓库的拥有者 | |||||
RepoName string `json:"repo_name"` //评估脚本所在仓库的名称 | |||||
} | } | ||||
type GpuInfos struct { | type GpuInfos struct { | ||||
@@ -476,7 +478,7 @@ type MatchInfo struct { | |||||
type GetJobLogResult struct { | type GetJobLogResult struct { | ||||
ScrollID string `json:"_scroll_id"` | ScrollID string `json:"_scroll_id"` | ||||
Took int `json:"took"` | |||||
Took int `json:"took"` | |||||
TimedOut bool `json:"timed_out"` | TimedOut bool `json:"timed_out"` | ||||
Shards struct { | Shards struct { | ||||
Total int `json:"total"` | Total int `json:"total"` | ||||
@@ -485,18 +487,34 @@ type GetJobLogResult struct { | |||||
Failed int `json:"failed"` | Failed int `json:"failed"` | ||||
} `json:"_shards"` | } `json:"_shards"` | ||||
Hits struct { | Hits struct { | ||||
Hits []struct { | |||||
Index string `json:"_index"` | |||||
Type string `json:"_type"` | |||||
ID string `json:"_id"` | |||||
Source struct { | |||||
Message string `json:"message"` | |||||
} `json:"_source"` | |||||
Sort []int `json:"sort"` | |||||
} `json:"hits"` | |||||
Hits []Hits `json:"hits"` | |||||
} `json:"hits"` | } `json:"hits"` | ||||
} | } | ||||
type Hits struct { | |||||
Index string `json:"_index"` | |||||
Type string `json:"_type"` | |||||
ID string `json:"_id"` | |||||
Source struct { | |||||
Message string `json:"message"` | |||||
} `json:"_source"` | |||||
Sort []int `json:"sort"` | |||||
} | |||||
type GetAllJobLogParams struct { | |||||
Scroll string `json:"scroll"` | |||||
ScrollID string `json:"scroll_id"` | |||||
} | |||||
type DeleteJobLogTokenParams struct { | |||||
ScrollID string `json:"scroll_id"` | |||||
} | |||||
type DeleteJobLogTokenResult struct { | |||||
Succeeded bool `json:"succeeded"` | |||||
NumFreed int `json:"num_freed"` | |||||
} | |||||
type CloudBrainResult struct { | type CloudBrainResult struct { | ||||
Code string `json:"code"` | Code string `json:"code"` | ||||
Msg string `json:"msg"` | Msg string `json:"msg"` | ||||
@@ -17,7 +17,8 @@ const ( | |||||
Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple; | Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple; | ||||
service ssh stop; | service ssh stop; | ||||
jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` | jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` | ||||
CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` | |||||
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` | |||||
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` | |||||
CodeMountPath = "/code" | CodeMountPath = "/code" | ||||
DataSetMountPath = "/dataset" | DataSetMountPath = "/dataset" | ||||
ModelMountPath = "/model" | ModelMountPath = "/model" | ||||
@@ -26,6 +26,8 @@ const ( | |||||
JobHasBeenStopped = "S410" | JobHasBeenStopped = "S410" | ||||
Public = "public" | Public = "public" | ||||
Custom = "custom" | Custom = "custom" | ||||
LogPageSize = 500 | |||||
LogPageTokenExpired = "5m" | |||||
) | ) | ||||
func getRestyClient() *resty.Client { | func getRestyClient() *resty.Client { | ||||
@@ -279,7 +281,7 @@ func GetJobLog(jobID string) (*models.GetJobLogResult, error) { | |||||
client := getRestyClient() | client := getRestyClient() | ||||
var result models.GetJobLogResult | var result models.GetJobLogResult | ||||
req := models.GetJobLogParams{ | req := models.GetJobLogParams{ | ||||
Size: "5000", | |||||
Size: strconv.Itoa(LogPageSize), | |||||
Sort: "log.offset", | Sort: "log.offset", | ||||
QueryInfo: models.QueryInfo{ | QueryInfo: models.QueryInfo{ | ||||
MatchInfo: models.MatchInfo{ | MatchInfo: models.MatchInfo{ | ||||
@@ -293,17 +295,79 @@ func GetJobLog(jobID string) (*models.GetJobLogResult, error) { | |||||
SetAuthToken(TOKEN). | SetAuthToken(TOKEN). | ||||
SetBody(req). | SetBody(req). | ||||
SetResult(&result). | SetResult(&result). | ||||
Post(HOST + "es/_search?_source=message&scroll=5m") | |||||
Post(HOST + "es/_search?_source=message&scroll=" + LogPageTokenExpired) | |||||
if err != nil { | if err != nil { | ||||
log.Info("GetJobLog failed: %v", err) | |||||
log.Error("GetJobLog failed: %v", err) | |||||
return &result, fmt.Errorf("resty GetJobLog: %v, %s", err, res.String()) | return &result, fmt.Errorf("resty GetJobLog: %v, %s", err, res.String()) | ||||
} | } | ||||
if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | ||||
log.Info("res.Status(): %s, response: %s", res.Status(), res.String()) | |||||
log.Error("res.Status(): %s, response: %s", res.Status(), res.String()) | |||||
return &result, errors.New(res.String()) | return &result, errors.New(res.String()) | ||||
} | } | ||||
return &result, nil | return &result, nil | ||||
} | } | ||||
func GetJobAllLog(scrollID string) (*models.GetJobLogResult, error) { | |||||
checkSetting() | |||||
client := getRestyClient() | |||||
var result models.GetJobLogResult | |||||
req := models.GetAllJobLogParams{ | |||||
Scroll: LogPageTokenExpired, | |||||
ScrollID: scrollID, | |||||
} | |||||
res, err := client.R(). | |||||
SetHeader("Content-Type", "application/json"). | |||||
SetAuthToken(TOKEN). | |||||
SetBody(req). | |||||
SetResult(&result). | |||||
Post(HOST + "es/_search/scroll") | |||||
if err != nil { | |||||
log.Error("GetJobAllLog failed: %v", err) | |||||
return &result, fmt.Errorf("resty GetJobAllLog: %v, %s", err, res.String()) | |||||
} | |||||
if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | |||||
log.Error("res.Status(): %s, response: %s", res.Status(), res.String()) | |||||
return &result, errors.New(res.String()) | |||||
} | |||||
return &result, nil | |||||
} | |||||
func DeleteJobLogToken(scrollID string) (error) { | |||||
checkSetting() | |||||
client := getRestyClient() | |||||
var result models.DeleteJobLogTokenResult | |||||
req := models.DeleteJobLogTokenParams{ | |||||
ScrollID: scrollID, | |||||
} | |||||
res, err := client.R(). | |||||
SetHeader("Content-Type", "application/json"). | |||||
SetAuthToken(TOKEN). | |||||
SetBody(req). | |||||
SetResult(&result). | |||||
Delete(HOST + "es/_search/scroll") | |||||
if err != nil { | |||||
log.Error("DeleteJobLogToken failed: %v", err) | |||||
return fmt.Errorf("resty DeleteJobLogToken: %v, %s", err, res.String()) | |||||
} | |||||
if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | |||||
log.Error("res.Status(): %s, response: %s", res.Status(), res.String()) | |||||
return errors.New(res.String()) | |||||
} | |||||
if !result.Succeeded { | |||||
log.Error("DeleteJobLogToken failed") | |||||
return errors.New("DeleteJobLogToken failed") | |||||
} | |||||
return nil | |||||
} |
@@ -102,25 +102,45 @@ func CloudbrainGetLog(ctx *context.Context) { | |||||
return | return | ||||
} | } | ||||
var hits []models.Hits | |||||
result, err := cloudbrain.GetJobLog(jobID) | result, err := cloudbrain.GetJobLog(jobID) | ||||
if err != nil{ | if err != nil{ | ||||
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"]) | log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"]) | ||||
ctx.ServerError(err.Error(), err) | ctx.ServerError(err.Error(), err) | ||||
return | return | ||||
} | } | ||||
hits = result.Hits.Hits | |||||
//if the size equal page_size, then take the scroll_id to get all log and delete the scroll_id(the num of scroll_id is limited) | |||||
if len(result.Hits.Hits) >= cloudbrain.LogPageSize { | |||||
for { | |||||
resultNext, err := cloudbrain.GetJobAllLog(result.ScrollID) | |||||
if err != nil{ | |||||
log.Error("GetJobAllLog failed: %v", err, ctx.Data["MsgID"]) | |||||
} else { | |||||
for _, hit := range resultNext.Hits.Hits { | |||||
hits = append(hits, hit) | |||||
} | |||||
} | |||||
if len(resultNext.Hits.Hits) < cloudbrain.LogPageSize { | |||||
log.Info("get all log already") | |||||
break | |||||
} | |||||
} | |||||
} | |||||
sort.Slice(result.Hits.Hits, func(i, j int) bool { | |||||
return result.Hits.Hits[i].Sort[0] < result.Hits.Hits[j].Sort[0] | |||||
cloudbrain.DeleteJobLogToken(result.ScrollID) | |||||
sort.Slice(hits, func(i, j int) bool { | |||||
return hits[i].Sort[0] < hits[j].Sort[0] | |||||
}) | }) | ||||
log.Info("%v", result.Hits.Hits) | |||||
var content []string | |||||
for _, log := range result.Hits.Hits { | |||||
content = append(content, log.Source.Message + "\n") | |||||
var content string | |||||
for _, log := range hits { | |||||
content += log.Source.Message + "\n" | |||||
} | } | ||||
log.Info("%v", content) | |||||
ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
"JobID": jobID, | "JobID": jobID, | ||||
"Content": content, | "Content": content, | ||||
@@ -1062,12 +1062,12 @@ func CloudBrainBenchmarkNew(ctx *context.Context) { | |||||
ctx.HTML(200, tplCloudBrainBenchmarkNew) | ctx.HTML(200, tplCloudBrainBenchmarkNew) | ||||
} | } | ||||
func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, error) { | |||||
uuid := "" | |||||
func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (*models.BenchmarkDataset, error) { | |||||
var childInfo *models.BenchmarkDataset | |||||
if benchmarkTypes == nil { | if benchmarkTypes == nil { | ||||
if err := json.Unmarshal([]byte(setting.BenchmarkTypes), &benchmarkTypes); err != nil { | if err := json.Unmarshal([]byte(setting.BenchmarkTypes), &benchmarkTypes); err != nil { | ||||
log.Error("json.Unmarshal BenchmarkTypes(%s) failed:%v", setting.BenchmarkTypes, err) | log.Error("json.Unmarshal BenchmarkTypes(%s) failed:%v", setting.BenchmarkTypes, err) | ||||
return uuid, err | |||||
return childInfo, err | |||||
} | } | ||||
} | } | ||||
@@ -1076,7 +1076,7 @@ func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, | |||||
if benchmarkType.Id == benchmarkTypeID { | if benchmarkType.Id == benchmarkTypeID { | ||||
for _, childType := range benchmarkType.Second { | for _, childType := range benchmarkType.Second { | ||||
if childType.Id == benchmarkChildTypeID { | if childType.Id == benchmarkChildTypeID { | ||||
uuid = childType.Attachment | |||||
childInfo = childType | |||||
isExist = true | isExist = true | ||||
break | break | ||||
} | } | ||||
@@ -1087,10 +1087,10 @@ func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, | |||||
if !isExist { | if !isExist { | ||||
log.Error("no such benchmark_type_id&benchmark_child_type_id") | log.Error("no such benchmark_type_id&benchmark_child_type_id") | ||||
return uuid, errors.New("no such benchmark_type_id&benchmark_child_type_id") | |||||
return childInfo, errors.New("no such benchmark_type_id&benchmark_child_type_id") | |||||
} | } | ||||
return uuid, nil | |||||
return childInfo, nil | |||||
} | } | ||||
func getBenchmarkGpuQueue(gpuQueue string) (string, error) { | func getBenchmarkGpuQueue(gpuQueue string) (string, error) { | ||||
@@ -1161,7 +1161,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||||
return | return | ||||
} | } | ||||
uuid, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID) | |||||
childInfo, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID) | |||||
if err != nil { | if err != nil { | ||||
log.Error("getBenchmarkAttachment failed:%v", err, ctx.Data["MsgID"]) | log.Error("getBenchmarkAttachment failed:%v", err, ctx.Data["MsgID"]) | ||||
cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
@@ -1240,7 +1240,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||||
} | } | ||||
} | } | ||||
if err := downloadRateCode(repo, jobName, setting.BenchmarkOwner, setting.BenchmarkName, benchmarkPath, form.BenchmarkCategory, gpuType); err != nil { | |||||
if err := downloadRateCode(repo, jobName, childInfo.Owner, childInfo.RepoName, benchmarkPath, form.BenchmarkCategory, gpuType); err != nil { | |||||
log.Error("downloadRateCode failed, %v", err, ctx.Data["MsgID"]) | log.Error("downloadRateCode failed, %v", err, ctx.Data["MsgID"]) | ||||
//cloudBrainNewDataPrepare(ctx) | //cloudBrainNewDataPrepare(ctx) | ||||
//ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | //ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | ||||
@@ -1254,7 +1254,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||||
//return | //return | ||||
} | } | ||||
err = cloudbrain.GenerateTask(ctx, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||||
err = cloudbrain.GenerateTask(ctx, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||||
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | ||||
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | ||||
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, | storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, | ||||