Browse Source

Merge branch 'benchmark' of https://git.openi.org.cn/OpenI/aiforge into benchmark

pull/1405/head
zouap 3 years ago
parent
commit
8c0fc2494b
5 changed files with 135 additions and 32 deletions
  1. +28
    -10
      models/cloudbrain.go
  2. +2
    -1
      modules/cloudbrain/cloudbrain.go
  3. +68
    -4
      modules/cloudbrain/resty.go
  4. +28
    -8
      routers/api/v1/repo/cloudbrain.go
  5. +9
    -9
      routers/repo/cloudbrain.go

+ 28
- 10
models/cloudbrain.go View File

@@ -403,6 +403,8 @@ type BenchmarkDataset struct {
Id int `json:"id"`
Value string `json:"value"` //二级算法类型名称
Attachment string `json:"attachment"` //数据集的uuid
Owner string `json:"owner"` //评估脚本所在仓库的拥有者
RepoName string `json:"repo_name"` //评估脚本所在仓库的名称
}

type GpuInfos struct {
@@ -476,7 +478,7 @@ type MatchInfo struct {

type GetJobLogResult struct {
ScrollID string `json:"_scroll_id"`
Took int `json:"took"`
Took int `json:"took"`
TimedOut bool `json:"timed_out"`
Shards struct {
Total int `json:"total"`
@@ -485,18 +487,34 @@ type GetJobLogResult struct {
Failed int `json:"failed"`
} `json:"_shards"`
Hits struct {
Hits []struct {
Index string `json:"_index"`
Type string `json:"_type"`
ID string `json:"_id"`
Source struct {
Message string `json:"message"`
} `json:"_source"`
Sort []int `json:"sort"`
} `json:"hits"`
Hits []Hits `json:"hits"`
} `json:"hits"`
}

type Hits struct {
Index string `json:"_index"`
Type string `json:"_type"`
ID string `json:"_id"`
Source struct {
Message string `json:"message"`
} `json:"_source"`
Sort []int `json:"sort"`
}

type GetAllJobLogParams struct {
Scroll string `json:"scroll"`
ScrollID string `json:"scroll_id"`
}

type DeleteJobLogTokenParams struct {
ScrollID string `json:"scroll_id"`
}

type DeleteJobLogTokenResult struct {
Succeeded bool `json:"succeeded"`
NumFreed int `json:"num_freed"`
}

type CloudBrainResult struct {
Code string `json:"code"`
Msg string `json:"msg"`


+ 2
- 1
modules/cloudbrain/cloudbrain.go View File

@@ -17,7 +17,8 @@ const (
Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;
service ssh stop;
jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CodeMountPath = "/code"
DataSetMountPath = "/dataset"
ModelMountPath = "/model"


+ 68
- 4
modules/cloudbrain/resty.go View File

@@ -26,6 +26,8 @@ const (
JobHasBeenStopped = "S410"
Public = "public"
Custom = "custom"
LogPageSize = 500
LogPageTokenExpired = "5m"
)

func getRestyClient() *resty.Client {
@@ -279,7 +281,7 @@ func GetJobLog(jobID string) (*models.GetJobLogResult, error) {
client := getRestyClient()
var result models.GetJobLogResult
req := models.GetJobLogParams{
Size: "5000",
Size: strconv.Itoa(LogPageSize),
Sort: "log.offset",
QueryInfo: models.QueryInfo{
MatchInfo: models.MatchInfo{
@@ -293,17 +295,79 @@ func GetJobLog(jobID string) (*models.GetJobLogResult, error) {
SetAuthToken(TOKEN).
SetBody(req).
SetResult(&result).
Post(HOST + "es/_search?_source=message&scroll=5m")
Post(HOST + "es/_search?_source=message&scroll=" + LogPageTokenExpired)

if err != nil {
log.Info("GetJobLog failed: %v", err)
log.Error("GetJobLog failed: %v", err)
return &result, fmt.Errorf("resty GetJobLog: %v, %s", err, res.String())
}

if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) {
log.Info("res.Status(): %s, response: %s", res.Status(), res.String())
log.Error("res.Status(): %s, response: %s", res.Status(), res.String())
return &result, errors.New(res.String())
}

return &result, nil
}

func GetJobAllLog(scrollID string) (*models.GetJobLogResult, error) {
checkSetting()
client := getRestyClient()
var result models.GetJobLogResult
req := models.GetAllJobLogParams{
Scroll: LogPageTokenExpired,
ScrollID: scrollID,
}

res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetBody(req).
SetResult(&result).
Post(HOST + "es/_search/scroll")

if err != nil {
log.Error("GetJobAllLog failed: %v", err)
return &result, fmt.Errorf("resty GetJobAllLog: %v, %s", err, res.String())
}

if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) {
log.Error("res.Status(): %s, response: %s", res.Status(), res.String())
return &result, errors.New(res.String())
}

return &result, nil
}

func DeleteJobLogToken(scrollID string) (error) {
checkSetting()
client := getRestyClient()
var result models.DeleteJobLogTokenResult
req := models.DeleteJobLogTokenParams{
ScrollID: scrollID,
}

res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetBody(req).
SetResult(&result).
Delete(HOST + "es/_search/scroll")

if err != nil {
log.Error("DeleteJobLogToken failed: %v", err)
return fmt.Errorf("resty DeleteJobLogToken: %v, %s", err, res.String())
}

if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) {
log.Error("res.Status(): %s, response: %s", res.Status(), res.String())
return errors.New(res.String())
}

if !result.Succeeded {
log.Error("DeleteJobLogToken failed")
return errors.New("DeleteJobLogToken failed")
}

return nil
}

+ 28
- 8
routers/api/v1/repo/cloudbrain.go View File

@@ -102,25 +102,45 @@ func CloudbrainGetLog(ctx *context.Context) {
return
}

var hits []models.Hits
result, err := cloudbrain.GetJobLog(jobID)
if err != nil{
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
hits = result.Hits.Hits

//if the size equal page_size, then take the scroll_id to get all log and delete the scroll_id(the num of scroll_id is limited)
if len(result.Hits.Hits) >= cloudbrain.LogPageSize {
for {
resultNext, err := cloudbrain.GetJobAllLog(result.ScrollID)
if err != nil{
log.Error("GetJobAllLog failed: %v", err, ctx.Data["MsgID"])
} else {
for _, hit := range resultNext.Hits.Hits {
hits = append(hits, hit)
}
}

if len(resultNext.Hits.Hits) < cloudbrain.LogPageSize {
log.Info("get all log already")
break
}
}
}

sort.Slice(result.Hits.Hits, func(i, j int) bool {
return result.Hits.Hits[i].Sort[0] < result.Hits.Hits[j].Sort[0]
cloudbrain.DeleteJobLogToken(result.ScrollID)

sort.Slice(hits, func(i, j int) bool {
return hits[i].Sort[0] < hits[j].Sort[0]
})

log.Info("%v", result.Hits.Hits)
var content []string
for _, log := range result.Hits.Hits {
content = append(content, log.Source.Message + "\n")
var content string
for _, log := range hits {
content += log.Source.Message + "\n"
}

log.Info("%v", content)

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"Content": content,


+ 9
- 9
routers/repo/cloudbrain.go View File

@@ -1062,12 +1062,12 @@ func CloudBrainBenchmarkNew(ctx *context.Context) {
ctx.HTML(200, tplCloudBrainBenchmarkNew)
}

func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, error) {
uuid := ""
func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (*models.BenchmarkDataset, error) {
var childInfo *models.BenchmarkDataset
if benchmarkTypes == nil {
if err := json.Unmarshal([]byte(setting.BenchmarkTypes), &benchmarkTypes); err != nil {
log.Error("json.Unmarshal BenchmarkTypes(%s) failed:%v", setting.BenchmarkTypes, err)
return uuid, err
return childInfo, err
}
}

@@ -1076,7 +1076,7 @@ func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string,
if benchmarkType.Id == benchmarkTypeID {
for _, childType := range benchmarkType.Second {
if childType.Id == benchmarkChildTypeID {
uuid = childType.Attachment
childInfo = childType
isExist = true
break
}
@@ -1087,10 +1087,10 @@ func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string,

if !isExist {
log.Error("no such benchmark_type_id&benchmark_child_type_id")
return uuid, errors.New("no such benchmark_type_id&benchmark_child_type_id")
return childInfo, errors.New("no such benchmark_type_id&benchmark_child_type_id")
}

return uuid, nil
return childInfo, nil
}

func getBenchmarkGpuQueue(gpuQueue string) (string, error) {
@@ -1161,7 +1161,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF
return
}

uuid, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID)
childInfo, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID)
if err != nil {
log.Error("getBenchmarkAttachment failed:%v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
@@ -1240,7 +1240,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF
}
}

if err := downloadRateCode(repo, jobName, setting.BenchmarkOwner, setting.BenchmarkName, benchmarkPath, form.BenchmarkCategory, gpuType); err != nil {
if err := downloadRateCode(repo, jobName, childInfo.Owner, childInfo.RepoName, benchmarkPath, form.BenchmarkCategory, gpuType); err != nil {
log.Error("downloadRateCode failed, %v", err, ctx.Data["MsgID"])
//cloudBrainNewDataPrepare(ctx)
//ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form)
@@ -1254,7 +1254,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF
//return
}

err = cloudbrain.GenerateTask(ctx, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
err = cloudbrain.GenerateTask(ctx, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description,


Loading…
Cancel
Save