Browse Source

Merge pull request 'GPU智算网络' (#2386) from grampus-specialpool into V20220630

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/2386
Reviewed-by: ychao_1983 <ychao_1983@sina.com>
pull/2389/head
lewis 3 years ago
parent
commit
5273e75acd
6 changed files with 30 additions and 22 deletions
  1. +6
    -4
      modules/grampus/grampus.go
  2. +8
    -2
      public/home/home.js
  3. +2
    -7
      routers/admin/cloudbrains.go
  4. +10
    -5
      routers/repo/grampus.go
  5. +1
    -1
      services/socketwrap/clientManager.go
  6. +3
    -3
      templates/admin/cloudbrain/list.tmpl

+ 6
- 4
modules/grampus/grampus.go View File

@@ -19,12 +19,14 @@ const (
ProcessorTypeNPU = "npu.huawei.com/NPU"
ProcessorTypeGPU = "nvidia.com/gpu"

CommandPrepareScript = "pwd;cd /cache;mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
GpuWorkDir = "/tmp/"
NpuWorkDir = "/cache/"

CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
"echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"
//CommandPrepareScript = "pwd;cd /cache;mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
// "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"

//CommandPrepareScript = "bash;pwd;apt-get -y update;apt-get -y upgrade;apt-get -y install wget;apt-get -y install unzip;" +
// "cd /tmp;mkdir -p output;mkdir -p code;mkdir -p dataset;wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
// "unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"
CodeArchiveName = "master.zip"
)



+ 8
- 2
public/home/home.js View File

@@ -156,7 +156,7 @@ document.onreadystatechange = function () {
html += recordPrefix + actionName;
html += " <a href=\"" + getRepoLink(record) + "\" rel=\"nofollow\">" + getRepotext(record) + "</a>"
}
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31"){
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){
html += recordPrefix + actionName;
html += " <a href=\"" + getTaskLink(record) + "\" rel=\"nofollow\">" + record.RefName + "</a>"
}
@@ -201,6 +201,8 @@ function getTaskLink(record){
re = re + "/modelmanage/show_model_info?name=" + record.RefName;
}else if(record.OpType == 31){
re = re + "/cloudbrain/train-job/" + record.Content;
}else if(record.OpType == 32 || record.OpType == 33){
re = re + "/grampus/train-job/" + record.Content;
}
re = encodeURI(re);
return re;
@@ -374,7 +376,9 @@ var actionNameZH={
"28":"创建了推理任务",
"29":"创建了评测任务",
"30":"导入了新模型",
"31":"创建了CPU/GPU类型训练任务"
"31":"创建了CPU/GPU类型训练任务",
"32":"创建了NPU类型训练任务",
"33":"创建了CPU/GPU类型训练任务"
};

var actionNameEN={
@@ -401,6 +405,8 @@ var actionNameEN={
"29":" created profiling task",
"30":" created new model",
"31":" created CPU/GPU type training task",
"32":" created NPU type training task",
"33":" created CPU/GPU type training task"
};

var repoAndOrgZH={


+ 2
- 7
routers/admin/cloudbrains.go View File

@@ -43,12 +43,6 @@ func CloudBrains(ctx *context.Context) {
if page <= 0 {
page = 1
}
debugType := models.TypeCloudBrainAll
if listType == models.GPUResource {
debugType = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
debugType = models.TypeCloudBrainTwo
}

var jobTypes []string
jobTypeNot := false
@@ -77,13 +71,14 @@ func CloudBrains(ctx *context.Context) {
PageSize: setting.UI.IssuePagingNum,
},
Keyword: keyword,
Type: debugType,
JobTypeNot: jobTypeNot,
JobStatusNot: jobStatusNot,
JobStatus: jobStatuses,
JobTypes: jobTypes,
NeedRepoInfo: true,
IsLatestVersion: modelarts.IsLatestVersion,
ComputeResource: listType,
Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Get job failed:", err)


+ 10
- 5
routers/repo/grampus.go View File

@@ -664,7 +664,12 @@ func GrampusGetLog(ctx *context.Context) {
func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) {
var command string

command += grampus.CommandPrepareScript
workDir := grampus.NpuWorkDir
if processorType == grampus.ProcessorTypeGPU {
workDir = grampus.GpuWorkDir
}

command += "pwd;cd " + workDir + grampus.CommandPrepareScript
//download code & dataset
if processorType == grampus.ProcessorTypeNPU {
commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";"
@@ -683,7 +688,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
if strings.HasSuffix(datasetName, ".tar.gz") {
toolUnzip = "tar -zxvf "
}
commandUnzip := "cd /cache/code;unzip -q master.zip;echo \"start to unzip dataset\";cd /cache/dataset;" + toolUnzip + datasetName + ";"
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + ";"
command += commandUnzip

//check unzip result
@@ -712,7 +717,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
}
}

commandCode := "cd /cache/code/" + strings.ToLower(repoName) + ";python " + bootFile + paramCode + ";"
commandCode := "cd " + workDir + "code/" + strings.ToLower(repoName) + ";python " + bootFile + paramCode + ";"
command += commandCode

//get exec result
@@ -721,10 +726,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo

//upload models
if processorType == grampus.ProcessorTypeNPU {
commandUpload := "cd /cache/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/cache/output/;"
commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + workDir + "output/;"
command += commandUpload
} else if processorType == grampus.ProcessorTypeGPU {
commandUpload := "cd /cache/script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + "/cache/output/;"
commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;"
command += commandUpload
}



+ 1
- 1
services/socketwrap/clientManager.go View File

@@ -10,7 +10,7 @@ import (
"github.com/elliotchance/orderedmap"
)

var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}
var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}

type ClientsManager struct {
Clients *orderedmap.OrderedMap


+ 3
- 3
templates/admin/cloudbrain/list.tmpl View File

@@ -102,7 +102,7 @@
</a>
{{else if eq .JobType "TRAIN"}}
<a class="title"
href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts{{end}}/train-job/{{$JobID}}"
href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 0}}/cloudbrain{{else if eq .Cloudbrain.Type 1}}/modelarts{{else if eq .Cloudbrain.Type 2}}/grampus{{end}}/train-job/{{$JobID}}"
title="{{.DisplayJobName}}" style="font-size: 14px;">
<span class="fitted"
style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span>
@@ -204,7 +204,7 @@
{{else}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}"
class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button"
data-repopath="{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain/train-job{{else}}/modelarts/{{if eq .JobType "INFERENCE"}}inference-job{{else}}train-job{{end}}{{end}}"
data-repopath="{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{else}}/modelarts/{{if eq .JobType "INFERENCE"}}inference-job{{else}}train-job{{end}}{{end}}"
data-jobid="{{$JobID}}" data-version="{{.VersionName}}">
{{$.i18n.Tr "repo.stop"}}
</a>
@@ -212,7 +212,7 @@
</div>
<!-- 删除任务 -->
<form class="ui compact buttons" id="delForm-{{$JobID}}"
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "BENCHMARK"}}/cloudbrain/benchmark{{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE")}}/cloudbrain{{else if eq .JobType "DEBUG"}}{{if eq .ComputeResource "NPU"}}/modelarts/notebook{{else}}/cloudbrain{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .ComputeResource "NPU"}}/modelarts/notebook{{else}}/cloudbrain{{end}}/train-job{{end}}/{{$JobID}}/del?isadminpage=true'
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "BENCHMARK"}}/cloudbrain/benchmark{{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE")}}/cloudbrain{{else if eq .JobType "DEBUG"}}{{if eq .ComputeResource "NPU"}}/modelarts/notebook{{else}}/cloudbrain{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}{{end}}/{{$JobID}}/del?isadminpage=true'
method="post">
{{$.CsrfTokenHtml}}
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{$JobID}}"


Loading…
Cancel
Save