Browse Source

Merge branch 'zouap' into fix-1653

pull/1830/head
wangjr 3 years ago
parent
commit
9ce3d46b32
15 changed files with 207 additions and 87 deletions
  1. +22
    -2
      models/cloudbrain.go
  2. +42
    -25
      public/home/search.js
  3. +3
    -12
      routers/api/v1/repo/cloudbrain.go
  4. +27
    -34
      routers/repo/cloudbrain.go
  5. +13
    -1
      templates/explore/repos.tmpl
  6. +13
    -1
      templates/repo/cloudbrain/benchmark/index.tmpl
  7. +14
    -2
      templates/repo/cloudbrain/benchmark/show.tmpl
  8. +10
    -2
      templates/repo/cloudbrain/show.tmpl
  9. +1
    -1
      templates/repo/cloudbrain/trainjob/new.tmpl
  10. +1
    -1
      templates/repo/cloudbrain/trainjob/show.tmpl
  11. +14
    -1
      templates/repo/debugjob/index.tmpl
  12. +14
    -1
      templates/repo/modelarts/inferencejob/index.tmpl
  13. +7
    -1
      templates/repo/modelarts/inferencejob/show.tmpl
  14. +14
    -1
      templates/repo/modelarts/trainjob/index.tmpl
  15. +12
    -2
      templates/repo/modelarts/trainjob/show.tmpl

+ 22
- 2
models/cloudbrain.go View File

@@ -170,7 +170,9 @@ func (task *Cloudbrain) ComputeAndSetDuration() {
if task.StartTime == 0 {
d = 0
} else if task.EndTime == 0 {
d = time.Now().Unix() - task.StartTime.AsTime().Unix()
if !task.IsTerminal() {
d = time.Now().Unix() - task.StartTime.AsTime().Unix()
}
} else {
d = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix()
}
@@ -182,6 +184,11 @@ func (task *Cloudbrain) ComputeAndSetDuration() {
task.TrainJobDuration = ConvertDurationToStr(d)
}

func (task *Cloudbrain) IsTerminal() bool {
status := task.Status
return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) || status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded)
}

func ConvertDurationToStr(duration int64) string {
if duration == 0 {
return DURATION_STR_ZERO
@@ -201,6 +208,19 @@ func IsCloudBrainOneDebugJobTerminal(status string) bool {
return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded)
}

func ParseAndSetDurationFromCloudBrainOne(result JobResultPayload, task *Cloudbrain) {
isActivated := result.JobStatus.CreatedTime > 0
if task.StartTime == 0 && isActivated {
task.StartTime = timeutil.TimeStamp(result.JobStatus.CreatedTime / 1000)
}
if task.EndTime == 0 && IsCloudBrainOneDebugJobTerminal(task.Status) && isActivated {
if result.JobStatus.CompletedTime > 0 {
task.EndTime = timeutil.TimeStamp(result.JobStatus.CompletedTime / 1000)
}
}
task.ComputeAndSetDuration()
}

type CloudbrainInfo struct {
Cloudbrain `xorm:"extends"`
User `xorm:"extends"`
@@ -368,7 +388,7 @@ type JobResultPayload struct {
AppProgress string `json:"appProgress"`
AppTrackingURL string `json:"appTrackingUrl"`
AppLaunchedTime int64 `json:"appLaunchedTime"`
AppCompletedTime interface{} `json:"appCompletedTime"`
AppCompletedTime int64 `json:"appCompletedTime"`
AppExitCode int `json:"appExitCode"`
AppExitDiagnostics string `json:"appExitDiagnostics"`
AppExitType interface{} `json:"appExitType"`


+ 42
- 25
public/home/search.js View File

@@ -124,29 +124,7 @@ function search(){
$('#searchForm').addClass("hiddenSearch");
initPageInfo();
if(!isEmpty(currentSearchKeyword)){
document.getElementById("find_id").innerHTML=getLabel(isZh,"search_finded");
currentSearchSortBy = sortBy[10];
currentSearchAscending = "false";
OnlySearchLabel =false;
page(currentPage);
if(currentSearchTableName != "repository"){
doSearch("repository",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "issue"){
doSearch("issue",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "user"){
doSearch("user",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "org"){
doSearch("org",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "dataset"){
doSearch("dataset",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "pr"){
doSearch("pr",currentSearchKeyword,1,pageSize,true,"",false);
}
doSpcifySearch(currentSearchTableName,currentSearchKeyword,sortBy[10],"false");
}else{
initDiv(false);
document.getElementById("find_id").innerHTML=getLabel(isZh,"search_empty");
@@ -187,6 +165,38 @@ function initDiv(isSearchLabel=false){
}
}

function doSpcifySearch(tableName,keyword,sortBy="",ascending="false"){
initDiv(false);
document.getElementById("find_id").innerHTML=getLabel(isZh,"search_finded");
currentSearchKeyword = keyword;
initPageInfo();
currentSearchTableName = tableName;
currentSearchSortBy = sortBy;
currentSearchAscending = ascending;
OnlySearchLabel =false;
page(currentPage);

if(currentSearchTableName != "repository"){
doSearch("repository",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "issue"){
doSearch("issue",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "user"){
doSearch("user",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "org"){
doSearch("org",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "dataset"){
doSearch("dataset",currentSearchKeyword,1,pageSize,true,"",false);
}
if(currentSearchTableName != "pr"){
doSearch("pr",currentSearchKeyword,1,pageSize,true,"",false);
}
}

function doSearchLabel(tableName,keyword,sortBy="",ascending="false"){
initDiv(true);
//document.getElementById("search_div").style.display="none";
@@ -1272,8 +1282,15 @@ var zhCN={
sessionStorage.removeItem("searchLabel");
doSearchLabel(sessionStorage.getItem("tableName"),sessionStorage.getItem("keyword"),sessionStorage.getItem("sortBy"),sessionStorage.getItem("ascending"));
}else{
console.log("normal search....");
search();
var specifySearch = sessionStorage.getItem("specifySearch");
if(specifySearch){
sessionStorage.removeItem("specifySearch");
doSpcifySearch(sessionStorage.getItem("tableName"),sessionStorage.getItem("keyword"),sessionStorage.getItem("sortBy"),sessionStorage.getItem("ascending"));
}else{
console.log("normal search....");
search();
}
}
}
}


+ 3
- 12
routers/api/v1/repo/cloudbrain.go View File

@@ -17,7 +17,6 @@ import (
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
routerRepo "code.gitea.io/gitea/routers/repo"
)

@@ -74,24 +73,16 @@ func GetCloudbrainTask(ctx *context.APIContext) {
}

job.Status = result.JobStatus.State
taskRoles := result.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
taskRoles := result.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))

job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
job.ContainerID = taskRes.TaskStatuses[0].ContainerID
job.Status = taskRes.TaskStatuses[0].State

if job.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() {
job.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix())
}
}

if result.JobStatus.State != string(models.JobWaiting) {
if job.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(job.Status) {
job.EndTime = timeutil.TimeStampNow()
}
job.ComputeAndSetDuration()
models.ParseAndSetDurationFromCloudBrainOne(result, job)
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)


+ 27
- 34
routers/repo/cloudbrain.go View File

@@ -435,9 +435,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo
task.Status = taskRes.TaskStatuses[0].State
task.ContainerID = taskRes.TaskStatuses[0].ContainerID
task.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() {
task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix())
}
models.ParseAndSetDurationFromCloudBrainOne(jobRes, task)
err = models.UpdateJob(task)
if err != nil {
ctx.Data["error"] = err.Error()
@@ -1048,14 +1046,7 @@ func SyncCloudbrainStatus() {
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
task.Status = taskRes.TaskStatuses[0].State
if task.Status != string(models.JobWaiting) {
task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix()
if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() {
task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix())
}
if task.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.ComputeAndSetDuration()
models.ParseAndSetDurationFromCloudBrainOne(jobRes, task)
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
@@ -1111,7 +1102,7 @@ func SyncCloudbrainStatus() {
continue
}
}
} else if task.JobType == string(models.JobTypeTrain) {
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
@@ -1152,6 +1143,7 @@ func SyncCloudbrainStatus() {
func HandleTaskWithNoDuration(ctx *context.Context) {
log.Info("HandleTaskWithNoDuration start")
count := 0
start := time.Now().Unix()
for {
cloudBrains, err := models.GetStoppedJobWithNoDurationJob()
if err != nil {
@@ -1168,13 +1160,19 @@ func HandleTaskWithNoDuration(ctx *context.Context) {
log.Info("HandleTaskWithNoTrainJobDuration:task less than 100")
break
}
if time.Now().Unix()-start > 600 {
log.Info("HandleTaskWithNoDuration : time out")
ctx.JSON(200, fmt.Sprintf("task stop for time out,count=%d", count))
return
}
}
log.Info("HandleTaskWithNoTrainJobDuration:count=%d", count)
ctx.JSON(200, "success")
ctx.JSON(200, fmt.Sprintf("success,count=%d", count))
}

func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
for _, task := range cloudBrains {
time.Sleep(time.Millisecond * 100)
log.Info("Handle job ,%+v", task)
if task.Type == models.TypeCloudBrainOne {
result, err := cloudbrain.GetJob(task.JobID)
@@ -1201,18 +1199,17 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
continue
}
task.Status = taskRes.TaskStatuses[0].State
startTime := taskRes.TaskStatuses[0].StartAt.Unix()
endTime := taskRes.TaskStatuses[0].FinishedAt.Unix()
log.Info("task startTime = %v endTime= %v ,jobId=%d", startTime, endTime, task.ID)
if startTime > 0 {
task.StartTime = timeutil.TimeStamp(startTime)
} else {
task.StartTime = task.CreatedUnix
}
if endTime > 0 {
task.EndTime = timeutil.TimeStamp(endTime)
log.Info("task startTime = %v endTime= %v ,jobId=%d", jobRes.JobStatus.StartTime, jobRes.JobStatus.EndTime, task.ID)
if jobRes.JobStatus.CreatedTime > 0 {
task.StartTime = timeutil.TimeStamp(jobRes.JobStatus.CreatedTime / 1000)
if jobRes.JobStatus.CompletedTime > 0 {
task.EndTime = timeutil.TimeStamp(jobRes.JobStatus.CompletedTime / 1000)
} else {
task.EndTime = task.UpdatedUnix
}
} else {
task.EndTime = task.UpdatedUnix
task.StartTime = 0
task.EndTime = 0
}

if task.EndTime < task.StartTime {
@@ -1221,7 +1218,8 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
task.StartTime = task.EndTime
task.EndTime = st
}
task.ComputeAndSetDuration()
task.Duration = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix()
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
@@ -1233,13 +1231,7 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
result, err := modelarts.GetNotebook2(task.JobID)
if err != nil {
log.Error("GetJob(%s) failed:%v", task.JobName, err)
task.StartTime = task.CreatedUnix
task.EndTime = task.UpdatedUnix
task.ComputeAndSetDuration()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
}
updateDefaultDuration(task)
continue
}

@@ -1248,7 +1240,7 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
startTime := result.Lease.CreateTime
duration := result.Lease.Duration / 1000
if startTime > 0 {
task.StartTime = timeutil.TimeStamp(startTime)
task.StartTime = timeutil.TimeStamp(startTime / 1000)
task.EndTime = task.StartTime.Add(duration)
}
task.ComputeAndSetDuration()
@@ -1258,10 +1250,11 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
continue
}
}
} else if task.JobType == string(models.JobTypeTrain) {
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
updateDefaultDuration(task)
continue
}



+ 13
- 1
templates/explore/repos.tmpl View File

@@ -10,7 +10,19 @@

<div class="ui sixteen wide mobile twelve wide tablet ten wide computer column">
{{template "explore/repo_list" .}}
{{template "base/paginate" .}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[20]"
:page-size="20"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>
<div class="computer only ui three wide computer column">
{{template "explore/repo_right" .}}


+ 13
- 1
templates/repo/cloudbrain/benchmark/index.tmpl View File

@@ -185,7 +185,19 @@
</div>
-->

{{template "base/paginate" .}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[10]"
:page-size="10"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>

</div>


+ 14
- 2
templates/repo/cloudbrain/benchmark/show.tmpl View File

@@ -196,7 +196,13 @@ td, th {
<span class="accordion-panel-title-content">
<span>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span class="cti-mgRight-sm">
{{if not (eq .StartTime 0)}}
<td>{{TimeSinceUnix1 .StartTime}}</td>
{{else}}
<td>{{TimeSinceUnix1 .CreatedUnix}}<td>
{{end}}
</span>

<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
@@ -252,7 +258,13 @@ td, th {

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span style="font-size: 12px;" class="">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
{{TimeSinceUnix1 .CreatedUnix}}
{{end}}
</span>
</div>
</td>
</tr>


+ 10
- 2
templates/repo/cloudbrain/show.tmpl View File

@@ -74,11 +74,19 @@
</tr>
<tr>
<td> 开始时间 </td>
<td>{{.JobStatus.StartTime}}</td>
{{if not (eq $.task.StartTime 0)}}
<td>{{TimeSinceUnix1 $.task.StartTime}}</td>
{{else}}
<td>无<td>
{{end}}
</tr>
<tr>
<td> 结束时间 </td>
<td>{{.JobStatus.EndTime}}</td>
{{if not (eq $.task.EndTime 0)}}
<td>{{TimeSinceUnix1 $.task.EndTime}}</td>
{{else}}
<td>无<td>
{{end}}
</tr>
<tr>
<td> ExitCode </td>


+ 1
- 1
templates/repo/cloudbrain/trainjob/new.tmpl View File

@@ -176,7 +176,7 @@
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://git.openi.org.cn/OpenIOSSG/MINIST_Example" target="_blank">查看样例</a>
<a href="https://git.openi.org.cn/OpenIOSSG/MNIST_PytorchExample_GPU" target="_blank">查看样例</a>
</div>

<div class="required unite min_title inline field" style="position: relative;">


+ 1
- 1
templates/repo/cloudbrain/trainjob/show.tmpl View File

@@ -214,7 +214,7 @@ td, th {
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<!--<a class="item" data-tab="second{{$k}}" onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a>-->
<a class="item" data-tab="second{{$k}}" onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item" data-tab="third{{$k}}" onclick="loadModelFile({{.VersionName}},'','','init')">{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">


+ 14
- 1
templates/repo/debugjob/index.tmpl View File

@@ -443,7 +443,20 @@
</div>
</div>
</div>
{{end}} {{template "base/paginate" .}}
{{end}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[10]"
:page-size="10"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>
</div>
</div>


+ 14
- 1
templates/repo/modelarts/inferencejob/index.tmpl View File

@@ -186,7 +186,20 @@
</div>
</div>
</div>
{{end}} {{template "base/paginate" .}}
{{end}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[10]"
:page-size="10"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>

</div>


+ 7
- 1
templates/repo/modelarts/inferencejob/show.tmpl View File

@@ -232,7 +232,13 @@ td, th {

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span style="font-size: 12px;" class="">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
{{TimeSinceUnix1 .CreatedUnix}}
{{end}}
</span>
</div>
</td>
</tr>


+ 14
- 1
templates/repo/modelarts/trainjob/index.tmpl View File

@@ -180,7 +180,20 @@
</div>
</div>
</div>
{{end}} {{template "base/paginate" .}}
{{end}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[10]"
:page-size="10"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>

</div>


+ 12
- 2
templates/repo/modelarts/trainjob/show.tmpl View File

@@ -223,7 +223,12 @@ td, th {
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">

<span class="cti-mgRight-sm">{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}</span>
<span class="cti-mgRight-sm">
{{if not (eq .Cloudbrain.StartTime 0)}}
{{TimeSinceUnix1 .Cloudbrain.StartTime}}
{{else}}
{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}
{{end}}</span>
<span class="cti-mgRight-sm"> {{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span>
<span class="cti-mgRight-sm"> {{$.i18n.Tr "repo.modelarts.parent_version"}}:{{.PreVersionName}}</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
@@ -293,7 +298,12 @@ td, th {

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}</span>
<span style="font-size: 12px;" class="">
{{if not (eq .Cloudbrain.StartTime 0)}}
{{TimeSinceUnix1 .Cloudbrain.StartTime}}
{{else}}
{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}
{{end}}</span>
</div>
</td>
</tr>


Loading…
Cancel
Save