Browse Source

Merge branch 'zouap' of https://git.openi.org.cn/OpenI/aiforge into zouap

pull/1036/head
zhoupzh 3 years ago
parent
commit
cf378a5af5
9 changed files with 161 additions and 6 deletions
  1. +1
    -0
      models/ai_model_manage.go
  2. +36
    -0
      models/cloudbrain.go
  3. +22
    -0
      modules/auth/auth.go
  4. +13
    -0
      modules/cron/tasks_basic.go
  5. +3
    -0
      modules/storage/storage.go
  6. +3
    -2
      routers/repo/ai_model_manage.go
  7. +75
    -0
      routers/repo/cloudbrain.go
  8. +6
    -2
      routers/repo/modelarts.go
  9. +2
    -2
      templates/repo/modelarts/trainjob/new.tmpl

+ 1
- 0
models/ai_model_manage.go View File

@@ -30,6 +30,7 @@ type AiModelManage struct {
CodeBranch string `xorm:"varchar(400) NULL"`
CodeCommitID string `xorm:"NULL"`
UserId int64 `xorm:"NOT NULL"`
UserName string `xorm:"NULL"`
TrainTaskInfo string `xorm:"text NULL"`
CreatedUnix timeutil.TimeStamp `xorm:"created"`
UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`


+ 36
- 0
models/cloudbrain.go View File

@@ -31,6 +31,7 @@ const (
JobTypeBrainScore JobType = "BRAINSCORE"
JobTypeTrain JobType = "TRAIN"

//notebook
ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
@@ -46,6 +47,30 @@ const (
ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败

//trainjob
ModelArtsTrainJobUnknown ModelArtsJobStatus = "UNKNOWN" //作业状态未知
ModelArtsTrainJobInit ModelArtsJobStatus = "INIT" //作业初始化状态
ModelArtsTrainJobImageCreating ModelArtsJobStatus = "IMAGE_CREATING" //作业镜像正在创建
ModelArtsTrainJobImageFailed ModelArtsJobStatus = "IMAGE_FAILED" //作业镜像创建失败
ModelArtsTrainJobSubmitTrying ModelArtsJobStatus = "SUBMIT_TRYING" //作业正在提交
ModelArtsTrainJobSubmitFailed ModelArtsJobStatus = "SUBMIT_FAILED" //作业提交失败
ModelArtsTrainJobDeleteFailed ModelArtsJobStatus = "DELETE_FAILED" //作业删除失败
ModelArtsTrainJobWaiting ModelArtsJobStatus = "WAITING" //作业正在排队中
ModelArtsTrainJobRunning ModelArtsJobStatus = "RUNNING" //作业正在运行中
ModelArtsTrainJobKilling ModelArtsJobStatus = "KILLING" //作业正在取消
ModelArtsTrainJobCompleted ModelArtsJobStatus = "COMPLETED" //作业已经完成
ModelArtsTrainJobFailed ModelArtsJobStatus = "FAILED" //作业运行失败
ModelArtsTrainJobKilled ModelArtsJobStatus = "KILLED" //作业取消成功
ModelArtsTrainJobCanceled ModelArtsJobStatus = "CANCELED" //作业取消
ModelArtsTrainJobLost ModelArtsJobStatus = "LOST" //作业丢失
ModelArtsTrainJobScaling ModelArtsJobStatus = "SCALING" //作业正在扩容
ModelArtsTrainJobSubmitModelFailed ModelArtsJobStatus = "SUBMIT_MODEL_FAILED" //提交模型失败
ModelArtsTrainJobDeployServiceFailed ModelArtsJobStatus = "DEPLOY_SERVICE_FAILED" //部署服务失败
ModelArtsTrainJobCheckInit ModelArtsJobStatus = "CHECK_INIT" //审核作业初始化
ModelArtsTrainJobCheckRunning ModelArtsJobStatus = "CHECK_RUNNING" //审核作业正在运行中
ModelArtsTrainJobCheckRunningCompleted ModelArtsJobStatus = "CHECK_RUNNING_COMPLETED" //审核作业已经完成
ModelArtsTrainJobCheckFailed ModelArtsJobStatus = "CHECK_FAILED" //审核作业失败
)

type Cloudbrain struct {
@@ -1133,3 +1158,14 @@ func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool {
}
return false
}

func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) {
cloudbrains := make([]*Cloudbrain, 0, 10)
return cloudbrains, x.
NotIn("status",
JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted,
ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed,
ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed).
Limit(100).
Find(&cloudbrains)
}

+ 22
- 0
modules/auth/auth.go View File

@@ -9,6 +9,9 @@ import (
"reflect"
"strings"

"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/setting"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth/sso"
"code.gitea.io/gitea/modules/validation"
@@ -31,6 +34,8 @@ func SignedInUser(ctx *macaron.Context, sess session.Store) (*models.User, bool)
return nil, false
}

checkAutoLogin(ctx, sess)

// Try to sign in with each of the enabled plugins
for _, ssoMethod := range sso.Methods() {
if !ssoMethod.IsEnabled() {
@@ -46,6 +51,23 @@ func SignedInUser(ctx *macaron.Context, sess session.Store) (*models.User, bool)
return nil, false
}

func checkAutoLogin(ctx *macaron.Context, sess session.Store) {
uid := sess.Get("uid")
if uid == nil {
uname := ctx.GetCookie(setting.CookieUserName)

u, err := models.GetUserByName(uname)
if err == nil {

if val, ok := ctx.GetSuperSecureCookie(
base.EncodeMD5(u.Rands+u.Passwd), setting.CookieRememberName); ok && val == u.Name {
sess.Set("uid", u.ID)
}
}
}

}

// Form form binding interface
type Form interface {
binding.Validator


+ 13
- 0
modules/cron/tasks_basic.go View File

@@ -185,6 +185,17 @@ func registerHandleSummaryStatistic() {
})
}

func registerSyncCloudbrainStatus() {
RegisterTaskFatal("sync_cloudbrain_status", &BaseConfig{
Enabled: true,
RunAtStart: false,
Schedule: "@every 10m",
}, func(ctx context.Context, _ *models.User, _ Config) error {
repo.SyncCloudbrainStatus()
return nil
})
}

func initBasicTasks() {
registerUpdateMirrorTask()
registerRepoHealthCheck()
@@ -202,4 +213,6 @@ func initBasicTasks() {

registerHandleRepoAndUserStatistic()
registerHandleSummaryStatistic()

registerSyncCloudbrainStatus()
}

+ 3
- 0
modules/storage/storage.go View File

@@ -51,6 +51,7 @@ func Init() error {
switch setting.Attachment.StoreType {
case LocalStorageType:
Attachments, err = NewLocalStorage(setting.Attachment.Path)
log.Info("local storage inited.")
case MinioStorageType:
minio := setting.Attachment.Minio
Attachments, err = NewMinioStorage(
@@ -62,6 +63,7 @@ func Init() error {
minio.BasePath,
minio.UseSSL,
)
log.Info("minio storage inited.")
default:
return fmt.Errorf("Unsupported attachment store type: %s", setting.Attachment.StoreType)
}
@@ -71,6 +73,7 @@ func Init() error {
log.Error("obs.New failed:", err)
return err
}
log.Info("obs cli inited.")

if err != nil {
return err


+ 3
- 2
routers/repo/ai_model_manage.go View File

@@ -25,7 +25,7 @@ const (
MODEL_NOT_LATEST = 0
)

func saveModelByParameters(jobId string, versionName string, name string, version string, label string, description string, userId int64) error {
func saveModelByParameters(jobId string, versionName string, name string, version string, label string, description string, userId int64, userName string) error {
aiTask, err := models.GetCloudbrainByJobIDAndVersionName(jobId, versionName)
//aiTask, err := models.GetCloudbrainByJobID(jobId)
if err != nil {
@@ -82,6 +82,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio
AttachmentId: aiTask.Uuid,
RepoId: aiTask.RepoID,
UserId: userId,
UserName: userName,
CodeBranch: aiTask.BranchName,
CodeCommitID: aiTask.CommitID,
Engine: aiTask.EngineID,
@@ -122,7 +123,7 @@ func SaveModel(ctx *context.Context) {
return
}

err := saveModelByParameters(JobId, VersionName, name, version, label, description, ctx.User.ID)
err := saveModelByParameters(JobId, VersionName, name, version, label, description, ctx.User.ID, ctx.User.Name)

if err != nil {
log.Info("save model error." + err.Error())


+ 75
- 0
routers/repo/cloudbrain.go View File

@@ -715,3 +715,78 @@ func downloadRateCode(repo *models.Repository, taskName, gitPath, codePath, benc

return nil
}

func SyncCloudbrainStatus() {
cloudBrains, err := models.GetCloudBrainUnStoppedJob()
if err != nil {
log.Error("GetCloudBrainUnStoppedJob failed:", err.Error())
return
}

for _, task := range cloudBrains {
if task.Type == models.TypeCloudBrainOne {
result, err := cloudbrain.GetJob(task.JobID)
if err != nil {
log.Error("GetJob(%s) failed:%v", task.JobName, err)
continue
}

if result != nil {
jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
taskRoles := jobRes.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
task.Status = taskRes.TaskStatuses[0].State
if task.Status != string(models.JobWaiting) {
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
}
}
}
} else if task.Type == models.TypeCloudBrainTwo {
if task.JobType == string(models.JobTypeDebug) {
result, err := modelarts.GetJob(task.JobID)
if err != nil {
log.Error("GetJob(%s) failed:%v", task.JobName, err)
continue
}

if result != nil {
task.Status = result.Status

err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
}
}
} else if task.JobType == string(models.JobTypeTrain) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
continue
}

if result != nil {
task.Status = modelarts.TransTrainJobStatus(result.IntStatus)
task.Duration = result.Duration
task.TrainJobDuration = result.TrainJobDuration

err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
}
}
} else {
log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType)
}

} else {
log.Error("task.Type(%s) is error:%d", task.JobName, task.Type)
}
}

return
}

+ 6
- 2
routers/repo/modelarts.go View File

@@ -364,13 +364,17 @@ func trainJobNewDataPrepare(ctx *context.Context) error {

Branches, err := ctx.Repo.GitRepo.GetBranches()
if err != nil {
log.Error("GetBranches failed:%v", err)
ctx.ServerError("GetBranches error:", err)
return err
}
ctx.Data["Branches"] = Branches
if Branches != nil {
ctx.Data["Branches"] = Branches
}

ctx.Data["BranchesCount"] = len(Branches)
ctx.Data["params"] = ""
ctx.Data["BranchName"] = ctx.Repo.BranchName
ctx.Data["branchName"] = ctx.Repo.BranchName

configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
if err != nil {


+ 2
- 2
templates/repo/modelarts/trainjob/new.tmpl View File

@@ -103,9 +103,9 @@
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.BranchName}}">{{.BranchName}}</option>
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.BranchName }}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}


Loading…
Cancel
Save