From 5405546144f6d25eef64a7b0ffcb8d06910beec3 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 18 Nov 2022 11:10:18 +0800 Subject: [PATCH 001/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/action.go | 4 + models/cloudbrain.go | 39 ++++ models/task_config.go | 2 + modules/auth/grampus.go | 16 ++ modules/grampus/grampus.go | 96 ++++++++++ modules/grampus/resty.go | 67 +++++++ modules/modelarts/modelarts.go | 92 +--------- routers/api/v1/repo/images.go | 2 +- routers/repo/grampus.go | 265 ++++++++++++++++++++-------- routers/routes/routes.go | 21 +-- services/cloudbrain/cloudbrainTask/count.go | 10 ++ 11 files changed, 440 insertions(+), 174 deletions(-) diff --git a/models/action.go b/models/action.go index 4b6f1dbad..869acb762 100755 --- a/models/action.go +++ b/models/action.go @@ -65,6 +65,8 @@ const ( ActionCreateImage //36 ActionImageRecommend //37 ActionChangeUserAvatar //38 + ActionCreateGrampusNPUDebugTask //39 + ActionCreateGrampusGPUDebugTask //40 ) // Action represents user operation type and other information to @@ -375,6 +377,8 @@ func (a *Action) IsCloudbrainAction() bool { ActionCreateInferenceTask, ActionCreateBenchMarkTask, ActionCreateGPUTrainTask, + ActionCreateGrampusGPUDebugTask, + ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, ActionCreateGrampusGPUTrainTask: return true diff --git a/models/cloudbrain.go b/models/cloudbrain.go index cdd9698fe..a2a69316f 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1442,6 +1442,20 @@ type GrampusJobInfo struct { UserID string `json:"userId"` Tasks []GrampusTasks `json:"tasks"` } + +type GrampusNotebookInfo struct { + StartedAt int64 `json:"startedAt"` + RunSec int64 `json:"runSec"` + CompletedAt int64 `json:"completedAt"` + CreatedAt int64 `json:"createdAt"` + UpdatedAt int64 `json:"updatedAt"` + Desc string `json:"desc"` + JobID string `json:"id"` + Name string `json:"name"` + Status string `json:"status"` + UserID string `json:"userId"` + Tasks []GrampusNotebookTask `json:"tasks"` +} type Center struct { ID string `json:"id"` Name string `json:"name"` @@ -1518,6 +1532,11 @@ type GetGrampusJobResponse struct { JobInfo GrampusJobInfo `json:"otJob"` } +type GrampusNotebookResponse struct { + GrampusResult + JobInfo GrampusNotebookInfo `json:"otJob"` +} + type GrampusStopJobResponse struct { GrampusResult StoppedAt int64 `json:"stoppedAt"` @@ -1537,6 +1556,21 @@ type GrampusTasks struct { Code GrampusDataset `json:"code"` BootFile string `json:"bootFile"` } +type GrampusNotebookTask struct { + AutoStopDuration int `json:"autoStopDuration"` + Name string `json:"name"` + Capacity int `json:"capacity"` + CenterID []string `json:"centerID"` + CenterName []string `json:"centerName"` + Code GrampusDataset `json:"code"` + Datasets []GrampusDataset `json:"datasets"` + ImageId string `json:"imageId"` + ImageUrl string `json:"imageUrl"` + ResourceSpecId string `json:"resourceSpecId"` + Token string `json:"token"` + Url string `json:"url"` + Status string `json:"status"` +} type GrampusDataset struct { Name string `json:"name"` @@ -1550,6 +1584,11 @@ type CreateGrampusJobRequest struct { Tasks []GrampusTasks `json:"tasks"` } +type CreateGrampusNotebookRequest struct { + Name string `json:"name"` + Tasks []GrampusNotebookTask `json:"tasks"` +} + type GetTrainJobMetricStatisticResult struct { TrainJobResult Interval int `json:"interval"` //查询的时间间隔,单位为分钟 diff --git a/models/task_config.go b/models/task_config.go index 0d9d21187..f86032fc9 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -36,6 +36,8 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateInferenceTask, ActionCreateBenchMarkTask, ActionCreateGPUTrainTask, + ActionCreateGrampusGPUDebugTask, + ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, ActionCreateGrampusGPUTrainTask: return TaskCreateCloudbrainTask diff --git a/modules/auth/grampus.go b/modules/auth/grampus.go index 414a7c25d..a50613613 100755 --- a/modules/auth/grampus.go +++ b/modules/auth/grampus.go @@ -29,3 +29,19 @@ type CreateGrampusTrainJobForm struct { func (f *CreateGrampusTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { return validate(errs, ctx.Data, f, ctx.Locale) } + +type CreateGrampusNotebookForm struct { + Type int `form:"type"` + DisplayJobName string `form:"display_job_name" binding:"Required"` + Attachment string `form:"attachment"` + ImageID string `form:"image_id" binding:"Required"` + Description string `form:"description"` + BranchName string `form:"branch_name" binding:"Required"` + Image string `form:"image" binding:"Required"` + DatasetName string `form:"dataset_name"` + SpecId int64 `form:"spec_id" binding:"Required"` +} + +func (f *CreateGrampusNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { + return validate(errs, ctx.Data, f, ctx.Locale) +} diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index b6f62560a..c8fc381d8 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -28,6 +28,7 @@ const ( BucketRemote = "grampus" RemoteModelPath = "/output/" + models.ModelSuffix + autoStopDurationMs = 4 * 60 * 60 * 1000 ) var ( @@ -81,6 +82,25 @@ type GenerateTrainJobReq struct { CodeName string } +type GenerateNotebookJobReq struct { + JobName string + Command string + ImageUrl string + ImageId string + DisplayJobName string + Uuid string + Description string + CodeObsPath string + CommitID string + BranchName string + ComputeResource string + ProcessType string + DatasetNames string + DatasetInfos map[string]models.DatasetInfo + Spec *models.Specification + CodeName string +} + func getEndPoint() string { index := strings.Index(setting.Endpoint, "//") endpoint := setting.Endpoint[index+2:] @@ -102,6 +122,82 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram return datasetGrampus } +func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { + createTime := timeutil.TimeStampNow() + + var datasetGrampus []models.GrampusDataset + var codeGrampus models.GrampusDataset + if ProcessorTypeNPU == req.ProcessType { + datasetGrampus = getDatasetGrampus(req.DatasetInfos) + codeGrampus = models.GrampusDataset{ + Name: req.CodeName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip", + } + } + + jobResult, err := createNotebookJob(models.CreateGrampusNotebookRequest{ + Name: req.JobName, + Tasks: []models.GrampusNotebookTask{ + { + Name: req.JobName, + ResourceSpecId: req.Spec.SourceSpecId, + ImageId: req.ImageId, + ImageUrl: req.ImageUrl, + Datasets: datasetGrampus, + Code: codeGrampus, + AutoStopDuration:autoStopDurationMs, + Capacity: setting.Capacity, + }, + }, + }) + if err != nil { + log.Error("createNotebookJob failed: %v", err.Error()) + return "", err + } + + jobID := jobResult.JobInfo.JobID + err = models.CreateCloudbrain(&models.Cloudbrain{ + Status: TransTrainJobStatus(jobResult.JobInfo.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobID, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeDebug), + Type: models.TypeC2Net, + Uuid: req.Uuid, + DatasetName: req.DatasetNames, + CommitID: req.CommitID, + IsLatestVersion: "1", + ComputeResource: req.ComputeResource, + ImageID: req.ImageId, + BranchName: req.BranchName, + Description: req.Description, + WorkServerNumber: 1, + EngineName: req.ImageUrl, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, + }) + + if err != nil { + log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) + return "", err + } + + var actionType models.ActionType + if req.ComputeResource == models.NPUResource { + actionType = models.ActionCreateGrampusNPUDebugTask + } else if req.ComputeResource == models.GPUResource { + actionType = models.ActionCreateGrampusGPUDebugTask + } + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType) + + return jobID, nil +} + func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() diff --git a/modules/grampus/resty.go b/modules/grampus/resty.go index a9e1aed5c..a5d55a71f 100755 --- a/modules/grampus/resty.go +++ b/modules/grampus/resty.go @@ -26,6 +26,7 @@ const ( urlGetResourceSpecs = urlOpenApiV1 + "resourcespec" urlGetAiCenter = urlOpenApiV1 + "sharescreen/aicenter" urlGetImages = urlOpenApiV1 + "image" + urlNotebookJob = urlOpenApiV1 + "notebook" errorIllegalToken = 1005 ) @@ -87,6 +88,39 @@ func getToken() error { return nil } +func createNotebookJob(req models.CreateGrampusNotebookRequest) (*models.GrampusNotebookResponse, error) { + checkSetting() + client := getRestyClient() + var result models.GrampusNotebookResponse + + retry := 0 + +sendjob: + _, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetBody(req). + SetResult(&result). + Post(HOST + urlNotebookJob) + + if err != nil { + return nil, fmt.Errorf("resty CreateNotebookJob: %s", err) + } + + if result.ErrorCode == errorIllegalToken && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if result.ErrorCode != 0 { + log.Error("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return &result, fmt.Errorf("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + } + + return &result, nil +} + func createJob(req models.CreateGrampusJobRequest) (*models.CreateGrampusJobResponse, error) { checkSetting() client := getRestyClient() @@ -120,6 +154,39 @@ sendjob: return &result, nil } + +func GetNotebookJob(jobID string)(*models.GrampusNotebookResponse, error){ + checkSetting() + client := getRestyClient() + var result models.GrampusNotebookResponse + + retry := 0 + +sendjob: + _, err := client.R(). + SetAuthToken(TOKEN). + SetResult(&result). + Get(HOST + urlNotebookJob + "/" + jobID) + + if err != nil { + return nil, fmt.Errorf("resty GetNotebookJob: %v", err) + } + + if result.ErrorCode == errorIllegalToken && retry < 1 { + retry++ + log.Info("retry get token") + _ = getToken() + goto sendjob + } + + if result.ErrorCode != 0 { + log.Error("GetNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return nil, fmt.Errorf("GetNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + } + + return &result, nil +} + func GetJob(jobID string) (*models.GetGrampusJobResponse, error) { checkSetting() client := getRestyClient() diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index dd502dfd0..c9d9bab69 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -4,7 +4,6 @@ import ( "encoding/json" "errors" "fmt" - "path" "strconv" "strings" @@ -15,20 +14,13 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/timeutil" ) const ( //notebook - storageTypeOBS = "obs" - autoStopDuration = 4 * 60 * 60 autoStopDurationMs = 4 * 60 * 60 * 1000 - MORDELART_USER_IMAGE_ENGINE_ID = -1 - DataSetMountPath = "/home/ma-user/work" - NotebookEnv = "Python3" - NotebookType = "Ascend" - FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" + //train-job // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" @@ -185,14 +177,6 @@ type OrgMultiNode struct { Node []int `json:"node"` } -// type Parameter struct { -// Label string `json:"label"` -// Value string `json:"value"` -// } - -// type Parameters struct { -// Parameter []Parameter `json:"parameter"` -// } type Parameters struct { Parameter []struct { @@ -201,80 +185,6 @@ type Parameters struct { } `json:"parameter"` } -func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error { - var dataActualPath string - if uuid != "" { - dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" - } else { - userPath := setting.UserBasePath + ctx.User.Name + "/" - isExist, err := storage.ObsHasObject(userPath) - if err != nil { - log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"]) - return err - } - - if !isExist { - if err = storage.ObsCreateObject(userPath); err != nil { - log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"]) - return err - } - } - - dataActualPath = setting.Bucket + "/" + userPath - } - - if poolInfos == nil { - json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) - } - createTime := timeutil.TimeStampNow() - jobResult, err := CreateJob(models.CreateNotebookParams{ - JobName: jobName, - Description: description, - ProfileID: setting.ProfileID, - Flavor: flavor, - Pool: models.Pool{ - ID: poolInfos.PoolInfo[0].PoolId, - Name: poolInfos.PoolInfo[0].PoolName, - Type: poolInfos.PoolInfo[0].PoolType, - }, - Spec: models.Spec{ - Storage: models.Storage{ - Type: storageTypeOBS, - Location: models.Location{ - Path: dataActualPath, - }, - }, - AutoStop: models.AutoStop{ - Enable: true, - Duration: autoStopDuration, - }, - }, - }) - if err != nil { - log.Error("CreateJob failed: %v", err.Error()) - return err - } - err = models.CreateCloudbrain(&models.Cloudbrain{ - - Status: string(models.JobWaiting), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobResult.ID, - JobName: jobName, - JobType: string(models.JobTypeDebug), - Type: models.TypeCloudBrainTwo, - Uuid: uuid, - ComputeResource: models.NPUResource, - CreatedUnix: createTime, - UpdatedUnix: createTime, - }) - - if err != nil { - return err - } - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask) - return nil -} func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error { if poolInfos == nil { diff --git a/routers/api/v1/repo/images.go b/routers/api/v1/repo/images.go index f0cb62980..e09ca260a 100644 --- a/routers/api/v1/repo/images.go +++ b/routers/api/v1/repo/images.go @@ -88,7 +88,7 @@ func getModelArtsImages(ctx *context.APIContext) { } func getC2netNpuImages(ctx *context.APIContext) { - images, err := grampus.GetImages(grampus.ProcessorTypeNPU) + images, err := grampus.GetImages(grampus.ProcessorTypeNPU, string(models.JobTypeTrain)) var npuImageInfos []NPUImageINFO if err != nil { log.Error("GetImages failed:", err.Error()) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index de7bb454d..4942e1df2 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -47,12 +47,34 @@ const ( tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show" //GPU + tplGrampusNotebookGPUNew base.TplName = "repo/grampus/notebook/gpu/new" tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new" //NPU + tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new" tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" ) +func GrampusNotebookNew(ctx *context.Context) { + ctx.Data["IsCreate"] = true + notebookType := ctx.QueryInt("type") + processType := grampus.ProcessorTypeGPU + if notebookType == 1 { + processType = grampus.ProcessorTypeNPU + } + err := grampusNotebookNewDataPrepare(ctx, processType) + if err != nil { + ctx.ServerError("get new notebook-job info failed", err) + return + } + if processType == grampus.ProcessorTypeGPU { + ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) + } else { + ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) + } + +} + func GrampusTrainJobGPUNew(ctx *context.Context) { ctx.Data["IsCreate"] = true err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -73,8 +95,125 @@ func GrampusTrainJobNPUNew(ctx *context.Context) { } ctx.HTML(200, tplGrampusTrainJobNPUNew) } +func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebookForm) { + ctx.Data["IsCreate"] = true + displayJobName := form.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + uuid := form.Attachment + description := form.Description + repo := ctx.Repo.Repository + branchName := form.BranchName + image := strings.TrimSpace(form.Image) + tpl := tplGrampusNotebookGPUNew + processType := grampus.ProcessorTypeGPU + computeSource := models.GPUResource + computeSourceSimple := models.GPU + if form.Type == 1 { + tpl = tplGrampusNotebookNPUNew + processType = grampus.ProcessorTypeNPU + computeSource = models.NPUResource + computeSourceSimple := models.NPU + } + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) + defer lock.UnLock() + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) + return + } -func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error { + if !jobNamePattern.MatchString(displayJobName) { + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) + return + } + + //check count limit + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeSource) + if err != nil { + log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("system error", tpl, &form) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) + return + } + } + + //check whether the task name in the project is duplicated + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("the job name did already exist", tpl, &form) + return + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("system error", tpl, &form) + return + } + } + + //check specification + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: computeSourceSimple, + Cluster: models.C2NetCluster, + }) + if err != nil || spec == nil { + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("Resource specification not available", tpl, &form) + return + } + + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + command := "" + + req := &grampus.GenerateNotebookReq{ + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: computeSource, + ProcessType: processType, + Command: command, + ImageUrl: image, + ImageId: form.ImageID, + Description: description, + Uuid: uuid, + CommitID: commitID, + BranchName: branchName, + DatasetNames: form.DatasetName, + WorkServerNumber: 1, + Spec: spec, + } + + _, err = grampus.GenerateNotebook(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr(err.Error(), tpl, &form) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") +} +func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) error { ctx.Data["PageIsCloudBrain"] = true t := time.Now() @@ -82,49 +221,67 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["display_job_name"] = displayJobName //get valid images - images, err := grampus.GetImages(processType) + if processType == grampus.ProcessorTypeNPU { + images, err := grampus.GetImages(processType, string(models.JobTypeDebug)) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos + } + } + //prepare available specs + computeResourceSimple := models.GPU + datasetType := models.TypeCloudBrainOne + computeResource := models.GPUResource + if processType == grampus.ProcessorTypeNPU { + computeResourceSimple = models.NPU + datasetType = models.TypeCloudBrainTwo + computeResource = models.NPUResource + } + + prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug) + + //get branches + branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) if err != nil { - log.Error("GetImages failed:", err.Error()) + log.Error("GetBranches error:", err.Error()) } else { - ctx.Data["images"] = images.Infos + ctx.Data["branches"] = branches } - grampus.InitSpecialPool() + ctx.Data["branchName"] = ctx.Repo.BranchName - ctx.Data["GPUEnabled"] = true - ctx.Data["NPUEnabled"] = true - includeCenters := make(map[string]struct{}) - excludeCenters := make(map[string]struct{}) - if grampus.SpecialPools != nil { - for _, pool := range grampus.SpecialPools.Pools { - if pool.IsExclusive { - if !IsUserInOrgPool(ctx.User.ID, pool) { - ctx.Data[pool.Type+"Enabled"] = false - } - } else { - if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { - if IsUserInOrgPool(ctx.User.ID, pool) { - for _, center := range pool.Pool { - includeCenters[center.Queue] = struct{}{} - } - } else { - for _, center := range pool.Pool { - excludeCenters[center.Queue] = struct{}{} - } + ctx.Data["datasetType"] = datasetType + waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, computeResource, models.JobTypeDebug) + ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount - } + return nil +} - } +func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error { + ctx.Data["PageIsCloudBrain"] = true - } + t := time.Now() + var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["display_job_name"] = displayJobName + + //get valid images + if processType == grampus.ProcessorTypeNPU { + images, err := grampus.GetImages(processType, string(models.JobTypeTrain)) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos } } //prepare available specs if processType == grampus.ProcessorTypeNPU { - prepareGrampusTrainSpecs(ctx, models.NPU) + prepareGrampusSpecs(ctx, models.NPU) } else if processType == grampus.ProcessorTypeGPU { - prepareGrampusTrainSpecs(ctx, models.GPU) + prepareGrampusSpecs(ctx, models.GPU) } //get branches @@ -203,55 +360,19 @@ func GrampusTrainJobVersionNew(ctx *context.Context) { } } -func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) { +func prepareGrampusSpecs(ctx *context.Context, computeResource string, jobType ...models.JobType) { + tempJobType := models.JobTypeTrain + if len(jobType) > 0 { + tempJobType = jobType[0] + } noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ - JobType: models.JobTypeTrain, + JobType: tempJobType, ComputeResource: computeResource, Cluster: models.C2NetCluster, }) ctx.Data["Specs"] = noteBookSpecs } -func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec { - if len(includeCenters) == 0 && len(excludeCenters) == 0 { - return specs.Infos - } - var grampusSpecs []models.GrampusSpec - for _, info := range specs.Infos { - if isInIncludeCenters(info, includeCenters) || (len(excludeCenters) != 0 && isNotAllInExcludeCenters(info, excludeCenters)) { - grampusSpecs = append(grampusSpecs, info) - } - - } - return grampusSpecs -} - -func isInIncludeCenters(grampusSpec models.GrampusSpec, centers map[string]struct{}) bool { - for _, center := range grampusSpec.Centers { - if _, ok := centers[center.ID]; ok { - return true - } - } - return false -} -func isNotAllInExcludeCenters(grampusSpec models.GrampusSpec, centers map[string]struct{}) bool { - for _, center := range grampusSpec.Centers { - if _, ok := centers[center.ID]; !ok { - return true - } - } - return false -} - -func IsUserInOrgPool(userId int64, pool *models.SpecialPool) bool { - org, _ := models.GetOrgByName(pool.Org) - if org != nil { - isOrgMember, _ := models.IsOrganizationMember(org.ID, userId) - return isOrgMember - } - return false -} - func grampusParamCheckCreateTrainJob(form auth.CreateGrampusTrainJobForm) error { if !strings.HasSuffix(strings.TrimSpace(form.BootFile), ".py") { log.Error("the boot file(%s) must be a python file", form.BootFile) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 2b361b507..450c2ac1b 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1216,6 +1216,17 @@ func RegisterRoutes(m *macaron.Macaron) { }) }, context.RepoRef()) m.Group("/grampus", func() { + m.Group("/notebook", func() { + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) + m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) + }) + + m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusNotebookNew) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusNotebookForm{}), repo.GrampusNotebookCreate) + }) + m.Group("/train-job", func() { m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) @@ -1288,16 +1299,6 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/modelarts", func() { m.Group("/notebook", func() { - /* v1.0 - m.Group("/:jobid", func() { - m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) - m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) - m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) - m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) - }) - m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) - m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate) - */ m.Group("/:id", func() { m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 985706911..4ae742c3a 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -62,6 +62,16 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s JobType: []models.JobType{models.JobTypeTrain}, NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.NPUResource, }} func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { From c5e6846c42aa7a770ea67ca6f99da3524cb48206 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 21 Nov 2022 15:40:25 +0800 Subject: [PATCH 002/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 37 ++- modules/grampus/grampus.go | 107 ++++--- modules/grampus/resty.go | 42 ++- routers/api/v1/api.go | 3 + routers/repo/grampus.go | 370 ++++++++++++++++++++-- routers/routes/routes.go | 10 +- services/cloudbrain/cloudbrainTask/sync_status.go | 50 +++ 7 files changed, 514 insertions(+), 105 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index a2a69316f..627c161f9 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1444,16 +1444,16 @@ type GrampusJobInfo struct { } type GrampusNotebookInfo struct { - StartedAt int64 `json:"startedAt"` - RunSec int64 `json:"runSec"` - CompletedAt int64 `json:"completedAt"` - CreatedAt int64 `json:"createdAt"` - UpdatedAt int64 `json:"updatedAt"` - Desc string `json:"desc"` - JobID string `json:"id"` - Name string `json:"name"` - Status string `json:"status"` - UserID string `json:"userId"` + StartedAt int64 `json:"startedAt"` + RunSec int64 `json:"runSec"` + CompletedAt int64 `json:"completedAt"` + CreatedAt int64 `json:"createdAt"` + UpdatedAt int64 `json:"updatedAt"` + Desc string `json:"desc"` + JobID string `json:"id"` + Name string `json:"name"` + Status string `json:"status"` + UserID string `json:"userId"` Tasks []GrampusNotebookTask `json:"tasks"` } type Center struct { @@ -1537,6 +1537,11 @@ type GrampusNotebookResponse struct { JobInfo GrampusNotebookInfo `json:"otJob"` } +type GrampusNotebookRestartResponse struct { + NewId string `json:"newId"` + Status string `json:"status"` +} + type GrampusStopJobResponse struct { GrampusResult StoppedAt int64 `json:"stoppedAt"` @@ -1558,18 +1563,20 @@ type GrampusTasks struct { } type GrampusNotebookTask struct { AutoStopDuration int `json:"autoStopDuration"` - Name string `json:"name"` + Name string `json:"name"` Capacity int `json:"capacity"` CenterID []string `json:"centerID"` CenterName []string `json:"centerName"` Code GrampusDataset `json:"code"` Datasets []GrampusDataset `json:"datasets"` + CodeUrl string `json:"codeUrl"` + DataUrl string `json:"dataUrl"` ImageId string `json:"imageId"` ImageUrl string `json:"imageUrl"` ResourceSpecId string `json:"resourceSpecId"` - Token string `json:"token"` - Url string `json:"url"` - Status string `json:"status"` + Token string `json:"token"` + Url string `json:"url"` + Status string `json:"status"` } type GrampusDataset struct { @@ -1585,7 +1592,7 @@ type CreateGrampusJobRequest struct { } type CreateGrampusNotebookRequest struct { - Name string `json:"name"` + Name string `json:"name"` Tasks []GrampusNotebookTask `json:"tasks"` } diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index c8fc381d8..26e85a1cf 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -26,9 +26,9 @@ const ( CodeArchiveName = "master.zip" - BucketRemote = "grampus" - RemoteModelPath = "/output/" + models.ModelSuffix - autoStopDurationMs = 4 * 60 * 60 * 1000 + BucketRemote = "grampus" + RemoteModelPath = "/output/" + models.ModelSuffix + autoStopDurationMs = 4 * 60 * 60 * 1000 ) var ( @@ -83,22 +83,22 @@ type GenerateTrainJobReq struct { } type GenerateNotebookJobReq struct { - JobName string - Command string - ImageUrl string - ImageId string - DisplayJobName string - Uuid string - Description string - CodeObsPath string - CommitID string - BranchName string - ComputeResource string - ProcessType string - DatasetNames string - DatasetInfos map[string]models.DatasetInfo - Spec *models.Specification - CodeName string + JobName string + Command string + ImageUrl string + ImageId string + DisplayJobName string + Uuid string + Description string + CodeStoragePath string + CommitID string + BranchName string + ComputeResource string + ProcessType string + DatasetNames string + DatasetInfos map[string]models.DatasetInfo + Spec *models.Specification + CodeName string } func getEndPoint() string { @@ -133,22 +133,31 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job Name: req.CodeName, Bucket: setting.Bucket, EndPoint: getEndPoint(), - ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip", + ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", } + } else { + + codeGrampus = models.GrampusDataset{ + Name: req.CodeName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + } + } jobResult, err := createNotebookJob(models.CreateGrampusNotebookRequest{ Name: req.JobName, Tasks: []models.GrampusNotebookTask{ { - Name: req.JobName, - ResourceSpecId: req.Spec.SourceSpecId, - ImageId: req.ImageId, - ImageUrl: req.ImageUrl, - Datasets: datasetGrampus, - Code: codeGrampus, - AutoStopDuration:autoStopDurationMs, - Capacity: setting.Capacity, + Name: req.JobName, + ResourceSpecId: req.Spec.SourceSpecId, + ImageId: req.ImageId, + ImageUrl: req.ImageUrl, + Datasets: datasetGrampus, + Code: codeGrampus, + AutoStopDuration: autoStopDurationMs, + Capacity: setting.Capacity, }, }, }) @@ -159,27 +168,27 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job jobID := jobResult.JobInfo.JobID err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.JobInfo.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobID, - JobName: req.JobName, - DisplayJobName: req.DisplayJobName, - JobType: string(models.JobTypeDebug), - Type: models.TypeC2Net, - Uuid: req.Uuid, - DatasetName: req.DatasetNames, - CommitID: req.CommitID, - IsLatestVersion: "1", - ComputeResource: req.ComputeResource, - ImageID: req.ImageId, - BranchName: req.BranchName, - Description: req.Description, - WorkServerNumber: 1, - EngineName: req.ImageUrl, - CreatedUnix: createTime, - UpdatedUnix: createTime, - Spec: req.Spec, + Status: TransTrainJobStatus(jobResult.JobInfo.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobID, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeDebug), + Type: models.TypeC2Net, + Uuid: req.Uuid, + DatasetName: req.DatasetNames, + CommitID: req.CommitID, + IsLatestVersion: "1", + ComputeResource: req.ComputeResource, + ImageID: req.ImageId, + BranchName: req.BranchName, + Description: req.Description, + WorkServerNumber: 1, + EngineName: req.ImageUrl, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, }) if err != nil { diff --git a/modules/grampus/resty.go b/modules/grampus/resty.go index a5d55a71f..13e6866fc 100755 --- a/modules/grampus/resty.go +++ b/modules/grampus/resty.go @@ -26,7 +26,7 @@ const ( urlGetResourceSpecs = urlOpenApiV1 + "resourcespec" urlGetAiCenter = urlOpenApiV1 + "sharescreen/aicenter" urlGetImages = urlOpenApiV1 + "image" - urlNotebookJob = urlOpenApiV1 + "notebook" + urlNotebookJob = urlOpenApiV1 + "notebook" errorIllegalToken = 1005 ) @@ -154,8 +154,7 @@ sendjob: return &result, nil } - -func GetNotebookJob(jobID string)(*models.GrampusNotebookResponse, error){ +func GetNotebookJob(jobID string) (*models.GrampusNotebookResponse, error) { checkSetting() client := getRestyClient() var result models.GrampusNotebookResponse @@ -251,7 +250,7 @@ sendjob: return &result, nil } -func GetImages(processorType string) (*models.GetGrampusImagesResult, error) { +func GetImages(processorType string, jobType string) (*models.GetGrampusImagesResult, error) { checkSetting() client := getRestyClient() var result models.GetGrampusImagesResult @@ -262,7 +261,7 @@ sendjob: _, err := client.R(). SetAuthToken(TOKEN). SetResult(&result). - Get(HOST + urlGetImages + "?processorType=" + processorType) + Get(HOST + urlGetImages + "?processorType=" + processorType + "&jobType=" + jobType) if err != nil { return nil, fmt.Errorf("resty GetImages: %v", err) @@ -338,19 +337,26 @@ func GetGrampusMetrics(jobID string) (models.GetTrainJobMetricStatisticResult, e return result, nil } -func StopJob(jobID string) (*models.GrampusStopJobResponse, error) { +func StopJob(jobID string, jobType ...string) (*models.GrampusStopJobResponse, error) { checkSetting() client := getRestyClient() var result models.GrampusStopJobResponse retry := 0 + url := urlTrainJob + if len(jobType) > 0 { + if jobType[0] == string(models.JobTypeDebug) { + url = urlNotebookJob + } + } + sendjob: _, err := client.R(). //SetHeader("Content-Type", "application/json"). SetAuthToken(TOKEN). SetResult(&result). - Post(HOST + urlTrainJob + "/" + jobID + "/stop") + Post(HOST + url + "/" + jobID + "/stop") if err != nil { return &result, fmt.Errorf("resty StopTrainJob: %v", err) @@ -402,3 +408,25 @@ sendjob: return &result, nil } + +func RestartNotebookJob(jobID string) (*models.GrampusNotebookRestartResponse, error) { + checkSetting() + client := getRestyClient() + var restartResponse *models.GrampusNotebookRestartResponse + + res, err := client.R(). + SetAuthToken(TOKEN). + SetResult(&restartResponse). + Get(HOST + urlNotebookJob + "/" + jobID + "/start") + + if err != nil { + return nil, fmt.Errorf("resty grampus restart note book job: %v", err) + } + + if res.StatusCode() != http.StatusOK { + log.Error("resty grampus restart note book job failed(%s): %v", res.String(), err.Error()) + return nil, fmt.Errorf("resty grampus restart note book job failed: %v", err) + } + + return restartResponse, nil +} diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 2afbb9b7d..dda8b0e80 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -1046,6 +1046,9 @@ func RegisterRoutes(m *macaron.Macaron) { }) }, reqRepoReader(models.UnitTypeCloudBrain)) m.Group("/grampus", func() { + m.Group("/notebook", func() { + m.Get("/:id", repo_ext.GetGrampusNotebook) + }) m.Group("/train-job", func() { m.Group("/:jobid", func() { m.Get("", repo.GetModelArtsTrainJobVersion) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 4942e1df2..f2f26cf0d 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -45,6 +45,7 @@ import ( const ( tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show" + tplGrampusNotebookShow base.TplName = "repo/grampus/notebook/show" //GPU tplGrampusNotebookGPUNew base.TplName = "repo/grampus/notebook/gpu/new" @@ -104,6 +105,10 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook repo := ctx.Repo.Repository branchName := form.BranchName image := strings.TrimSpace(form.Image) + codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" + + codeStoragePath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" + tpl := tplGrampusNotebookGPUNew processType := grampus.ProcessorTypeGPU computeSource := models.GPUResource @@ -112,7 +117,8 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook tpl = tplGrampusNotebookNPUNew processType = grampus.ProcessorTypeNPU computeSource = models.NPUResource - computeSourceSimple := models.NPU + computeSourceSimple = models.NPU + codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath } lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) @@ -135,13 +141,13 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeSource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, processType) + grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, processType) + grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) return } @@ -152,14 +158,14 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, processType) + grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, processType) + grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr("system error", tpl, &form) return } @@ -172,41 +178,80 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook Cluster: models.C2NetCluster, }) if err != nil || spec == nil { - grampusTrainJobNewDataPrepare(ctx, processType) + grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr("Resource specification not available", tpl, &form) return } if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) - grampusTrainJobNewDataPrepare(ctx, processType) + grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) return } + datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, computeSourceSimple) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) + return + } + + //prepare code and out path + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + + if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { + log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) + return + } + + if processType == grampus.ProcessorTypeGPU { + if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { + log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) + return + } + } else { + + if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) + return + } + } + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) command := "" - req := &grampus.GenerateNotebookReq{ - JobName: jobName, - DisplayJobName: displayJobName, - ComputeResource: computeSource, - ProcessType: processType, - Command: command, - ImageUrl: image, - ImageId: form.ImageID, - Description: description, - Uuid: uuid, - CommitID: commitID, - BranchName: branchName, - DatasetNames: form.DatasetName, - WorkServerNumber: 1, - Spec: spec, + req := &grampus.GenerateNotebookJobReq{ + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: computeSource, + ProcessType: processType, + Command: command, + ImageUrl: image, + ImageId: form.ImageID, + Description: description, + Uuid: uuid, + CommitID: commitID, + BranchName: branchName, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + Spec: spec, + CodeStoragePath: codeStoragePath, } - _, err = grampus.GenerateNotebook(ctx, req) + _, err = grampus.GenerateNotebookJob(ctx, req) if err != nil { - log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) + log.Error("GenerateNotebookJob failed:%v", err.Error(), ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, processType) ctx.RenderWithErr(err.Error(), tpl, &form) return @@ -844,22 +889,54 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } +func GetGrampusNotebook(ctx *context.APIContext) { + var ( + err error + ) + + ID := ctx.Params(":id") + job, err := models.GetCloudbrainByID(ID) + if err != nil { + ctx.NotFound("", err) + log.Error("GetCloudbrainByID failed:", err) + return + } + + jobAfter, err := cloudbrainTask.SyncGrampusNotebookStatus(job) + + if err != nil { + ctx.NotFound(err) + log.Error("Sync cloud brain one status failed:", err) + return + } + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "ID": ID, + "JobName": jobAfter.JobName, + "JobStatus": jobAfter.Status, + "SubState": "", + "CreatedTime": jobAfter.CreatedUnix.Format("2006-01-02 15:04:05"), + "CompletedTime": jobAfter.UpdatedUnix.Format("2006-01-02 15:04:05"), + "JobDuration": jobAfter.TrainJobDuration, + }) +} + func GrampusStopJob(ctx *context.Context) { - var ID = ctx.Params(":jobid") + var ID = ctx.Params(":id") var resultCode = "0" var errorMsg = "" var status = "" task := ctx.Cloudbrain for { - if task.Status == string(models.GrampusStatusStopped) || task.Status == string(models.GrampusStatusFailed) || task.Status == string(models.GrampusStatusSucceeded) { + if task.Status == models.GrampusStatusStopped || task.Status == models.GrampusStatusFailed || task.Status == models.GrampusStatusSucceeded { log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) resultCode = "-1" - errorMsg = "system error" + errorMsg = "System error" break } - res, err := grampus.StopJob(task.JobID) + res, err := grampus.StopJob(task.JobID, task.JobType) if err != nil { log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) resultCode = strconv.Itoa(res.ErrorCode) @@ -896,6 +973,25 @@ func GrampusStopJob(ctx *context.Context) { }) } +func GrampusNotebookDel(ctx *context.Context) { + var listType = ctx.Query("listType") + if err := deleteGrampusJob(ctx); err != nil { + log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"]) + ctx.ServerError(err.Error(), err) + return + } + + var isAdminPage = ctx.Query("isadminpage") + var isHomePage = ctx.Query("ishomepage") + if ctx.IsUserSiteAdmin() && isAdminPage == "true" { + ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains") + } else if isHomePage == "true" { + ctx.Redirect(setting.AppSubURL + "/cloudbrains") + } else { + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=" + listType) + } +} + func GrampusTrainJobDel(ctx *context.Context) { var listType = ctx.Query("listType") if err := deleteGrampusJob(ctx); err != nil { @@ -918,9 +1014,9 @@ func GrampusTrainJobDel(ctx *context.Context) { func deleteGrampusJob(ctx *context.Context) error { task := ctx.Cloudbrain - if task.Status != string(models.GrampusStatusStopped) && task.Status != string(models.GrampusStatusSucceeded) && task.Status != string(models.GrampusStatusFailed) { + if task.Status != models.GrampusStatusStopped && task.Status != models.GrampusStatusSucceeded && task.Status != models.GrampusStatusFailed { log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) - return errors.New("the job has not been stopped") + return errors.New(ctx.Tr("cloudbrain.Not_Stopped")) } err := models.DeleteJob(task) @@ -938,6 +1034,89 @@ func deleteGrampusJob(ctx *context.Context) error { return nil } +func GrampusNotebookShow(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + + var task *models.Cloudbrain + task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid")) + if err != nil { + log.Error("GetCloudbrainByJobID failed:" + err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + return + } + task.ContainerIp = "" + + if task.DeletedAt.IsZero() { //normal record + result, err := grampus.GetNotebookJob(task.JobID) + if err != nil { + log.Error("GetJob failed:" + err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + return + } + + if result != nil { + if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { + task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] + } + oldStatus := task.Status + task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) + if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { + task.Duration = result.JobInfo.RunSec + if task.Duration < 0 { + task.Duration = 0 + } + task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) + + if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { + task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) + } + if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { + task.EndTime = task.StartTime.Add(task.Duration) + } + task.CorrectCreateUnix() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + if models.IsTrainJobTerminal(task.Status) && task.ComputeResource == models.NPUResource { + if len(result.JobInfo.Tasks[0].CenterID) == 1 { + urchin.GetBackNpuModel(task.ID, grampus.GetRemoteEndPoint(result.JobInfo.Tasks[0].CenterID[0]), grampus.BucketRemote, grampus.GetNpuModelObjectKey(task.JobName), grampus.GetCenterProxy(setting.Grampus.LocalCenterID)) + } + } + } + } + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob failed:" + err.Error()) + } + } + } + + if len(task.Parameters) > 0 { + var parameters models.Parameters + err := json.Unmarshal([]byte(task.Parameters), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) + ctx.ServerError("system error", err) + return + } + + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + "; " + paramTemp = paramTemp + param + } + task.Parameters = paramTemp[:len(paramTemp)-2] + } else { + task.Parameters = "" + } + } + prepareSpec4Show(ctx, task) + ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) + ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) + ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) + ctx.HTML(http.StatusOK, tplGrampusNotebookShow) +} + func GrampusTrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true @@ -1281,3 +1460,134 @@ func HandleTaskWithAiCenter(ctx *context.Context) { r["updateCounts"] = updateCounts ctx.JSON(http.StatusOK, response.SuccessWithData(r)) } + +func GrampusNotebookDebug(ctx *context.Context) { + + result, err := grampus.GetNotebookJob(ctx.Cloudbrain.JobID) + + if err != nil { + ctx.RenderWithErr(err.Error(), tplDebugJobIndex, nil) + return + } + if len(result.JobInfo.Tasks) > 0 { + ctx.Redirect(result.JobInfo.Tasks[0].Url + "?token=" + result.JobInfo.Tasks[0].Token) + return + } + ctx.NotFound("Can not find the job.", nil) + +} + +func GrampusNotebookRestart(ctx *context.Context) { + var id = ctx.Params(":id") + var resultCode = "-1" + var errorMsg = "" + var status = "" + var spec *models.Specification + + task := ctx.Cloudbrain + if ctx.Written() { + return + } + + for { + + if task.Status != models.GrampusStatusStopped && task.Status != models.GrampusStatusSucceeded && task.Status != models.GrampusStatusFailed { + log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) + errorMsg = "the job is not stopped" + break + } + + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), task.ComputeResource) + + if err != nil { + log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) + errorMsg = "system error" + break + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + resultCode = "2" + errorMsg = ctx.Tr("repo.cloudbrain.morethanonejob") + break + } + } + + oldSpec, err := resource.GetCloudbrainSpec(task.ID) + if err != nil || oldSpec == nil { + log.Error("NotebookManage GetCloudbrainSpec error.%v", err) + errorMsg = "Resource specification not available" + break + } + + computeSourceSimple := models.GPU + action := models.ActionCreateGrampusGPUDebugTask + if task.ComputeResource == models.NPUResource { + computeSourceSimple = models.NPU + action = models.ActionCreateGrampusNPUDebugTask + } + spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ + JobType: models.JobType(task.JobType), + ComputeResource: computeSourceSimple, + Cluster: models.C2NetCluster, + }) + if err != nil || spec == nil { + log.Error("NotebookManage GetAndCheckSpec error.task.id = %d", task.ID) + errorMsg = "Resource specification not support any more" + break + } + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) + errorMsg = ctx.Tr("points.insufficient_points_balance") + break + } + createTime := timeutil.TimeStampNow() + + res, err := grampus.RestartNotebookJob(task.JobID) + if err != nil { + log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"]) + errorMsg = err.Error() + break + } + + newTask := &models.Cloudbrain{ + Status: res.Status, + UserID: task.UserID, + RepoID: task.RepoID, + JobID: res.NewId, + JobName: task.JobName, + DisplayJobName: task.DisplayJobName, + JobType: task.JobType, + Type: task.Type, + Uuid: task.Uuid, + Image: task.Image, + ComputeResource: task.ComputeResource, + Description: task.Description, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: spec, + } + + err = models.RestartCloudbrain(task, newTask) + if err != nil { + log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) + errorMsg = "system error" + break + } + + id = strconv.FormatInt(newTask.ID, 10) + + status = res.Status + resultCode = "0" + + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, id, newTask.DisplayJobName, action) + + break + } + + ctx.JSON(200, map[string]string{ + "result_code": resultCode, + "error_msg": errorMsg, + "status": status, + "id": id, + }) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 450c2ac1b..c56ca46ee 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1217,10 +1217,12 @@ func RegisterRoutes(m *macaron.Macaron) { }, context.RepoRef()) m.Group("/grampus", func() { m.Group("/notebook", func() { - m.Group("/:jobid", func() { - m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) + m.Group("/:id", func() { + m.Get("", reqRepoCloudBrainReader, repo.GrampusNotebookShow) + m.Get("/debug", reqWechatBind, cloudbrain.AdminOrJobCreaterRight, repo.GrampusNotebookDebug) + m.Post("/restart", reqWechatBind, cloudbrain.AdminOrJobCreaterRight, repo.GrampusNotebookRestart) m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob) - m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusNotebookDel) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusNotebookNew) @@ -1230,7 +1232,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/train-job", func() { m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) - m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob) + m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusStopJob) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.GrampusTrainJobVersionNew) diff --git a/services/cloudbrain/cloudbrainTask/sync_status.go b/services/cloudbrain/cloudbrainTask/sync_status.go index 67dc4d3b7..99ef7c419 100644 --- a/services/cloudbrain/cloudbrainTask/sync_status.go +++ b/services/cloudbrain/cloudbrainTask/sync_status.go @@ -3,6 +3,9 @@ package cloudbrainTask import ( "net/http" + "code.gitea.io/gitea/modules/grampus" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/httplib" @@ -57,6 +60,53 @@ func SyncCloudBrainOneStatus(task *models.Cloudbrain) (*models.Cloudbrain, error } +func SyncGrampusNotebookStatus(job *models.Cloudbrain) (*models.Cloudbrain, error) { + result, err := grampus.GetNotebookJob(job.JobID) + if err != nil { + + log.Error("GetJob(%s) failed:%v", job.JobName, err) + + return job, err + } + + if job.StartTime == 0 && result.JobInfo.StartedAt > 0 { + job.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) + } + oldStatus := job.Status + job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) + job.Duration = result.JobInfo.RunSec + job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) + + if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { + job.EndTime = job.StartTime.Add(job.Duration) + } + job.CorrectCreateUnix() + + if len(job.AiCenter) == 0 { + if len(result.JobInfo.Tasks) > 0 { + if len(result.JobInfo.Tasks[0].CenterID) > 0 && len(result.JobInfo.Tasks[0].CenterName) > 0 { + job.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] + } + } + } + + if job.Status != models.GrampusStatusWaiting { + if oldStatus != job.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } + job.TrainUrl = result.JobInfo.Tasks[0].CodeUrl + job.DataUrl = result.JobInfo.Tasks[0].DataUrl + err = models.UpdateJob(job) + if err != nil { + log.Error("UpdateJob failed:", err) + return nil, err + } + } + + return job, nil + +} + func isNoteBookReady(task *models.Cloudbrain) bool { if task.JobType != string(models.JobTypeDebug) { return true From ce2e2ecdd4db18ce7ba1ae6f9ce4331da4b70958 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 22 Nov 2022 15:15:08 +0800 Subject: [PATCH 003/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/resty.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/grampus/resty.go b/modules/grampus/resty.go index 13e6866fc..f615d96f2 100755 --- a/modules/grampus/resty.go +++ b/modules/grampus/resty.go @@ -257,11 +257,16 @@ func GetImages(processorType string, jobType string) (*models.GetGrampusImagesRe retry := 0 + queryType := "TrainJob" + if jobType == string(models.JobTypeDebug) { + queryType = "Notebook" + } + sendjob: _, err := client.R(). SetAuthToken(TOKEN). SetResult(&result). - Get(HOST + urlGetImages + "?processorType=" + processorType + "&jobType=" + jobType) + Get(HOST + urlGetImages + "?processorType=" + processorType + "&trainType=" + queryType) if err != nil { return nil, fmt.Errorf("resty GetImages: %v", err) From 76b4c8494897f4646cbb2103ecbd1f2b0584d040 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 22 Nov 2022 16:02:03 +0800 Subject: [PATCH 004/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 26e85a1cf..899091a94 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -121,6 +121,21 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram } return datasetGrampus } +func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) []models.GrampusDataset { + var datasetGrampus []models.GrampusDataset + for _, datasetInfo := range datasetInfos { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: datasetInfo.DataLocalPath + "/" + datasetInfo.FullName, + ReadOnly: true, + ContainerPath: "/dataset/" + datasetInfo.Name, + }) + + } + return datasetGrampus +} func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() @@ -136,12 +151,14 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", } } else { - + datasetGrampus = getDatasetGPUGrampus(req.DatasetInfos) codeGrampus = models.GrampusDataset{ - Name: req.CodeName, - Bucket: setting.Bucket, - EndPoint: getEndPoint(), - ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + Name: req.CodeName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + ReadOnly: false, + ContainerPath: "/code", } } From eb878ee2abb2e6481f4297d1330f8a24a142af3d Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 22 Nov 2022 16:33:22 +0800 Subject: [PATCH 005/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 627c161f9..5ac2f9b72 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1580,10 +1580,12 @@ type GrampusNotebookTask struct { } type GrampusDataset struct { - Name string `json:"name"` - Bucket string `json:"bucket"` - EndPoint string `json:"endPoint"` - ObjectKey string `json:"objectKey"` + Name string `json:"name"` + Bucket string `json:"bucket"` + EndPoint string `json:"endPoint"` + ObjectKey string `json:"objectKey"` + ContainerPath string `json:"containerPath"` + ReadOnly bool `json:"readOnly"` } type CreateGrampusJobRequest struct { From d924f0c26441a5dbed103107f4306ae6f4e87756 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 22 Nov 2022 16:48:20 +0800 Subject: [PATCH 006/370] fix issue --- templates/repo/cloudbrain/new.tmpl | 13 ++ templates/repo/grampus/notebook/gpu/new.tmpl | 313 +++++++++++++++++++++++++++ templates/repo/grampus/notebook/npu/new.tmpl | 184 ++++++++++++++++ templates/repo/modelarts/notebook/new.tmpl | 28 ++- 4 files changed, 523 insertions(+), 15 deletions(-) create mode 100644 templates/repo/grampus/notebook/gpu/new.tmpl create mode 100644 templates/repo/grampus/notebook/npu/new.tmpl diff --git a/templates/repo/cloudbrain/new.tmpl b/templates/repo/cloudbrain/new.tmpl index b75e75cdf..91c20eb9f 100755 --- a/templates/repo/cloudbrain/new.tmpl +++ b/templates/repo/cloudbrain/new.tmpl @@ -38,6 +38,19 @@ {{.i18n.Tr "repo.cloudbrain.new"}}
+
- - {{$.i18n.Tr "repo.modelarts.train_job.standard"}} @@ -610,4 +614,5 @@ $('td.ti-text-form-content.spec div').text(specStr); SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType)); })(); + console.log({{.version_list_task}}); \ No newline at end of file diff --git a/templates/repo/cloudbrain/trainjob/show.tmpl b/templates/repo/cloudbrain/trainjob/show.tmpl index 618582b41..75cad03b4 100644 --- a/templates/repo/cloudbrain/trainjob/show.tmpl +++ b/templates/repo/cloudbrain/trainjob/show.tmpl @@ -907,5 +907,4 @@ $('td.ti-text-form-content.spec div').text(specStr); SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType)); })(); - console.log({{.version_list_task}}); \ No newline at end of file From e419e6fc49e93d6bf942c230188290858d47c8f0 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 23 Nov 2022 17:24:02 +0800 Subject: [PATCH 019/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 108394bec..7de722395 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -949,7 +949,7 @@ func GrampusStopJob(ctx *context.Context) { break } oldStatus := task.Status - task.Status = string(models.GrampusStatusStopped) + task.Status = res.Status if task.EndTime == 0 { task.EndTime = timeutil.TimeStampNow() } From 27773d6de931ab49c91024c682c4a6cd6eee03d3 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 23 Nov 2022 17:30:08 +0800 Subject: [PATCH 020/370] =?UTF-8?q?=E6=99=BA=E7=AE=97=E7=BD=91=E7=BB=9C?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0stopping=E7=8A=B6=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 5 ++++- services/cloudbrain/cloudbrainTask/count.go | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 07b49d8d4..fb0d5d4a8 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -114,6 +114,7 @@ const ( GrampusStatusFailed = "FAILED" GrampusStatusSucceeded = "SUCCEEDED" GrampusStatusStopped = "STOPPED" + GrampusStatusStopping = "STOPPING" GrampusStatusUnknown = "UNKNOWN" GrampusStatusWaiting = "WAITING" @@ -1545,7 +1546,9 @@ type GrampusNotebookRestartResponse struct { type GrampusStopJobResponse struct { GrampusResult - StoppedAt int64 `json:"stoppedAt"` + StoppedAt int64 `json:"stoppedAt"` + ID string `json:"id"` + Status string `json:"status"` } type GrampusTasks struct { diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 4ae742c3a..372afc7d9 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -16,7 +16,7 @@ type StatusInfo struct { var CloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} var CloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} -var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} +var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning, models.GrampusStatusStopping} var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, JobType: []models.JobType{models.JobTypeDebug}, From 6e1416f5a3a71e894e4fb1c5bbcdeedd8c9ecf26 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 23 Nov 2022 17:41:39 +0800 Subject: [PATCH 021/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 7de722395..81e2f5b4b 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -949,7 +949,7 @@ func GrampusStopJob(ctx *context.Context) { break } oldStatus := task.Status - task.Status = res.Status + task.Status = grampus.TransTrainJobStatus(res.Status) if task.EndTime == 0 { task.EndTime = timeutil.TimeStampNow() } From e2a3f3ced62bf6a0f47892f3c9aae474db8c57e0 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Wed, 23 Nov 2022 18:30:30 +0800 Subject: [PATCH 022/370] fix issue --- templates/repo/cloudbrain/show.tmpl | 1 - templates/repo/grampus/notebook/npu/new.tmpl | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/cloudbrain/show.tmpl b/templates/repo/cloudbrain/show.tmpl index cb6d69402..694c79c3c 100755 --- a/templates/repo/cloudbrain/show.tmpl +++ b/templates/repo/cloudbrain/show.tmpl @@ -614,5 +614,4 @@ $('td.ti-text-form-content.spec div').text(specStr); SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType)); })(); - console.log({{.version_list_task}}); \ No newline at end of file diff --git a/templates/repo/grampus/notebook/npu/new.tmpl b/templates/repo/grampus/notebook/npu/new.tmpl index f17815133..6241c5912 100644 --- a/templates/repo/grampus/notebook/npu/new.tmpl +++ b/templates/repo/grampus/notebook/npu/new.tmpl @@ -210,4 +210,5 @@ shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, }); })(); + console.log({{ .Spec }}); From 9d1b64edd96a1efdb818eb5272c3489b44f6fd21 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Thu, 24 Nov 2022 08:53:03 +0800 Subject: [PATCH 023/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 81e2f5b4b..894650704 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -949,7 +949,7 @@ func GrampusStopJob(ctx *context.Context) { break } oldStatus := task.Status - task.Status = grampus.TransTrainJobStatus(res.Status) + task.Status = getStopJobResponseStatus(res) if task.EndTime == 0 { task.EndTime = timeutil.TimeStampNow() } @@ -978,6 +978,14 @@ func GrampusStopJob(ctx *context.Context) { }) } +func getStopJobResponseStatus(res *models.GrampusStopJobResponse) string { + newStatus := models.GrampusStatusStopping + if res.Status != "" { + newStatus = grampus.TransTrainJobStatus(res.Status) + } + return newStatus +} + func GrampusNotebookDel(ctx *context.Context) { var listType = ctx.Query("listType") if err := deleteGrampusJob(ctx); err != nil { @@ -1039,19 +1047,23 @@ func deleteGrampusJob(ctx *context.Context) error { return nil } +type NotebookDataset struct { + DatasetUrl string `json:"dataset_url"` +} + func GrampusNotebookShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var task *models.Cloudbrain - task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid")) + task, err := models.GetCloudbrainByIDWithDeleted(ctx.Params(":id")) if err != nil { - log.Error("GetCloudbrainByJobID failed:" + err.Error()) + log.Error("GetCloudbrainByID failed:" + err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } task.ContainerIp = "" - if task.DeletedAt.IsZero() { //normal record + if task.DeletedAt.IsZero() && cloudbrainTask.IsTaskNotStop(task) { //normal record result, err := grampus.GetNotebookJob(task.JobID) if err != nil { log.Error("GetJob failed:" + err.Error()) @@ -1116,9 +1128,13 @@ func GrampusNotebookShow(ctx *context.Context) { } } prepareSpec4Show(ctx, task) - ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) + datasetList := make([]NotebookDataset, 0) + _ = json.Unmarshal([]byte(task.DataUrl), &datasetList) + + ctx.Data["datasetDownload"] = datasetList ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) + ctx.Data[""] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) ctx.HTML(http.StatusOK, tplGrampusNotebookShow) } From c6b93a7ce66b338d8e8c7020ab23c1a458e6dde5 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Thu, 24 Nov 2022 09:26:42 +0800 Subject: [PATCH 024/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 894650704..8671e4437 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1130,11 +1130,10 @@ func GrampusNotebookShow(ctx *context.Context) { prepareSpec4Show(ctx, task) datasetList := make([]NotebookDataset, 0) _ = json.Unmarshal([]byte(task.DataUrl), &datasetList) - + ctx.Data["task"] = task ctx.Data["datasetDownload"] = datasetList ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) - ctx.Data[""] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) ctx.HTML(http.StatusOK, tplGrampusNotebookShow) } From 98abb4eff61c217c1c6cd905179087a6970ecbf3 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Thu, 24 Nov 2022 10:36:34 +0800 Subject: [PATCH 025/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 8671e4437..fd5d5acd2 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1127,16 +1127,48 @@ func GrampusNotebookShow(ctx *context.Context) { task.Parameters = "" } } + user, err := models.GetUserByID(task.UserID) + if err == nil { + task.User = user + } + prepareSpec4Show(ctx, task) - datasetList := make([]NotebookDataset, 0) - _ = json.Unmarshal([]byte(task.DataUrl), &datasetList) + ctx.Data["task"] = task - ctx.Data["datasetDownload"] = datasetList + ctx.Data["datasetDownload"] = getDatasetDownloadInfo(ctx, task) ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) ctx.HTML(http.StatusOK, tplGrampusNotebookShow) } +func getDatasetDownloadInfo(ctx *context.Context, task *models.Cloudbrain) []models.DatasetDownload { + datasetDownload := make([]models.DatasetDownload, 0) + if ctx.IsSigned { + if task.Uuid != "" && task.UserID == ctx.User.ID { + datasetDownload = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, true) + } + } + datasetObsUrlList := make([]NotebookDataset, 0) + _ = json.Unmarshal([]byte(task.DataUrl), &datasetObsUrlList) + + for _, datasetInfo := range datasetDownload { + match := false + for _, datasetObs := range datasetObsUrlList { + + if strings.Contains(datasetObs.DatasetUrl, datasetInfo.DatasetName) { + datasetInfo.DatasetDownloadLink = datasetObs.DatasetUrl + match = true + break + } + } + if !match { + datasetInfo.DatasetDownloadLink = "" + } + + } + return datasetDownload +} + func GrampusTrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true From 903caf2807021f879614af174e3c2f076a0eda02 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 24 Nov 2022 10:56:39 +0800 Subject: [PATCH 026/370] fix issue --- modules/templates/helper.go | 9 + templates/repo/cloudbrain/show.tmpl | 204 ----------------- templates/repo/debugjob/index.tmpl | 2 +- templates/repo/grampus/notebook/show.tmpl | 340 ++++++++++++++++++++++++++++ templates/repo/modelarts/notebook/show.tmpl | 278 +---------------------- web_src/less/openi.less | 125 ++++++++++ 6 files changed, 482 insertions(+), 476 deletions(-) create mode 100644 templates/repo/grampus/notebook/show.tmpl diff --git a/modules/templates/helper.go b/modules/templates/helper.go index 3e424454b..b54b74fd1 100755 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -47,6 +47,7 @@ const ( REF_TYPE_BRANCH = "branch" REF_TYPE_TAG = "tag" REF_TYPE_PATTERN = "(refs/heads/|refs/tags/)" + DURATION_STR_ZERO = "00:00:00" ) // Used from static.go && dynamic.go @@ -109,6 +110,7 @@ func NewFuncMap() []template.FuncMap { "AttachmentStatus": dataset.GetStatusText, "IsShowDataSetOfCurrentRepo": dataset.IsShowDataSetOfCurrentRepo, "TimeSinceUnixShort": timeutil.TimeSinceUnixShort, + "ConvertDurationToStr": ConvertDurationToStr, "RawTimeSince": timeutil.RawTimeSince, "FileSize": base.FileSize, "PrettyNumber": base.PrettyNumber, @@ -362,6 +364,7 @@ func NewTextFuncMap() []texttmpl.FuncMap { "TimeSinceUnix": timeutil.TimeSinceUnix, "TimeSinceUnix1": timeutil.TimeSinceUnix1, "TimeSinceUnixShort": timeutil.TimeSinceUnixShort, + "ConvertDurationToStr": ConvertDurationToStr, "RawTimeSince": timeutil.RawTimeSince, "AttachmentResourceType": dataset.GetResourceType, "AttachmentStatus": dataset.GetStatusText, @@ -801,3 +804,9 @@ func MB2GB(size int) string { } return s } +func ConvertDurationToStr(duration int64) string { + if duration <= 0 { + return DURATION_STR_ZERO + } + return util.AddZero(duration/3600) + ":" + util.AddZero(duration%3600/60) + ":" + util.AddZero(duration%60) +} diff --git a/templates/repo/cloudbrain/show.tmpl b/templates/repo/cloudbrain/show.tmpl index 694c79c3c..220cfcd61 100755 --- a/templates/repo/cloudbrain/show.tmpl +++ b/templates/repo/cloudbrain/show.tmpl @@ -1,208 +1,4 @@ {{template "base/head" .}} - - -
diff --git a/templates/repo/debugjob/index.tmpl b/templates/repo/debugjob/index.tmpl index 190c023a3..eff8e21e7 100755 --- a/templates/repo/debugjob/index.tmpl +++ b/templates/repo/debugjob/index.tmpl @@ -211,7 +211,7 @@
{{.DisplayJobName}} diff --git a/templates/repo/grampus/notebook/show.tmpl b/templates/repo/grampus/notebook/show.tmpl new file mode 100644 index 000000000..7aba31d6b --- /dev/null +++ b/templates/repo/grampus/notebook/show.tmpl @@ -0,0 +1,340 @@ +{{template "base/head" .}} +
+ {{template "repo/header" .}} +
+

+ +

+ {{with .task}} +
+
+
+
+ + + +
+ + {{TimeSinceUnix1 .CreatedUnix}} + + {{$.i18n.Tr "repo.modelarts.status"}}: + {{.Status}} + + {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}: + {{ConvertDurationToStr .Duration}} + + + +
+
+
+
+
+
+
+
+ +
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ {{$.i18n.Tr "repo.cloudbrain_task"}} + +
+ {{.DisplayJobName}} +
+
+ {{$.i18n.Tr "repo.modelarts.status"}} + +
+ {{.Status}} +
+
+ {{$.i18n.Tr "repo.cloudbrain_creator"}} + +
+ {{.User.Name}} +
+
+ {{$.i18n.Tr "repo.modelarts.code_version"}} + +
+ {{if .BranchName}} + {{.BranchName}} + {{else}} + -- + {{end}} +
+
+ {{$.i18n.Tr "repo.modelarts.computing_resources"}} + +
+ {{.ComputeResource}} +
+
+ {{$.i18n.Tr "repo.modelarts.createtime"}} + +
+ {{TimeSinceUnix1 .CreatedUnix}} +
+
+ {{$.i18n.Tr "cloudbrain.description"}} + +
+ {{.Description}} +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ {{$.i18n.Tr "cloudbrain.mirror"}} + +
+ + {{.EngineName}} + +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} + +
+
+ {{$.i18n.Tr "repo.cloudbrain.time.starttime"}} + +
+
+ {{if not (eq .StartTime 0)}} + {{TimeSinceUnix1 .StartTime}} + {{else}} + -- + {{end}} +
+
+
+ {{$.i18n.Tr "repo.cloudbrain.time.endtime"}} + +
+ {{if not (eq .EndTime 0)}} + {{TimeSinceUnix1 .EndTime}} + {{else}} + -- + {{end}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}} + +
+ {{if not (eq .Duration 0)}} + {{ConvertDurationToStr .Duration}} + {{else}} + -- + {{end}} +
+
+ 代码obs地址 + +
+ + + {{if .TrainUrl}} + {{.TrainUrl}} + {{else}} + -- + {{end}} + + +
+
+
+
+ + +
+
+ + + + + + + + {{range $.datasetDownload}} + + + + + + {{end}} + +
{{$.i18n.Tr "dataset.file"}}{{$.i18n.Tr "dataset.download_url"}}{{$.i18n.Tr "dataset.download_oper"}}
+ {{if eq .IsDelete true}} + {{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}}) + {{else}} + {{.DatasetName}} + {{end}} +
{{.DatasetDownloadLink}}
{{$.i18n.Tr "dataset.download_copy"}}
+
+
+
+ + +
+
+
+ {{end}} + {{template "base/paginate" .}} +
+ +
+ +
+ + +
+{{template "base/footer" .}} + + diff --git a/templates/repo/modelarts/notebook/show.tmpl b/templates/repo/modelarts/notebook/show.tmpl index 57d74b132..45d051682 100755 --- a/templates/repo/modelarts/notebook/show.tmpl +++ b/templates/repo/modelarts/notebook/show.tmpl @@ -1,225 +1,4 @@ {{template "base/head" .}} -
{{template "repo/header" .}}
@@ -308,7 +87,7 @@ -
+
{{.User.Name}}
@@ -336,18 +115,6 @@
- {{if eq .Type 2}} - - - 代码分支 - - -
- {{.BranchName}} -
- - - {{end}} {{$.i18n.Tr "cloudbrain.description"}} @@ -375,13 +142,13 @@
- {{if .Image}}{{.Image}}{{else}}{{.EngineName}}{{end}} + {{.Image}}
@@ -442,37 +209,8 @@
- {{if eq .Type 2}} - - - 代码地址 - - -
- {{if .TrainUrl}} - {{.TrainUrl}} - {{else}} - -- - {{end}} -
- - - - - 数据集地址 - - -
- {{if .DataUrl}} - {{.DataUrl}} - {{else}} - -- - {{end}} -
- - - {{end}} +
@@ -480,9 +218,7 @@
-
- {{if ne .Type 2}} @@ -505,10 +241,11 @@ {{end}}
{{$.i18n.Tr "dataset.file"}}
- {{end}} -
+
+ +
@@ -563,5 +300,4 @@ }); $('td.ti-text-form-content.spec div').text(specStr); })(); - console.log({{.task}}) diff --git a/web_src/less/openi.less b/web_src/less/openi.less index fe002ceb7..973ec1961 100644 --- a/web_src/less/openi.less +++ b/web_src/less/openi.less @@ -1301,3 +1301,128 @@ i.SUCCEEDED { max-height: 500px; overflow: auto; } + +.border-according { + border: 1px solid #dfe1e6; + .padding0 { + padding: 0 !important; + } + .according-panel-heading { + box-sizing: border-box; + padding: 8px 16px; + color: #252b3a; + background-color: #f2f5fc; + line-height: 1.5; + cursor: pointer; + -moz-user-select: none; + -webkit-user-select: none; + -ms-user-select: none; + -khtml-user-select: none; + user-select: none; + .accordion-panel-title { + margin-top: 0; + margin-bottom: 0; + color: #252b3a; + .accordion-panel-title-content { + vertical-align: middle; + display: inline-block; + width: calc(100% - 32px); + cursor: default; + .redo-color { + color: #3291F8; + } + .acc-margin-bottom { + margin-bottom: 5px; + } + .title_text { + font-size: 12px; + } + .ac-display-inblock { + display: inline-block; + .cti-mgRight-sm { + margin-right: 8px; + } + } + .uc-accordionTitle-black { + color: #333; + } + } + } + } + .content-pad { + padding: 15px 35px; + .dataset_nowrap_two_line{ + word-wrap: break-word; + word-break: break-all; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-box-orient: vertical; + line-clamp: 2; + -webkit-line-clamp: 2; + text-overflow: -o-ellipsis-lastline; + max-height: 50px; + } + .tab_2_content { + margin-left: 10px; + .ac-grid { + display: block; + *zoom: 1; + .ac-grid-col { + width: 50%; + float: left; + .ti-form { + text-align: left; + max-width: 100%; + vertical-align: middle; + tbody { + font-size: 12px; + vertical-align: inherit; + tr { + vertical-align: inherit; + .td{ + vertical-align: top; + white-space: normal; + padding: 0; + } + .th{padding: 0;} + .ti-text-form-label { + + padding-bottom: 20px; + padding-right: 20px; + color: #8a8e99; + font-size: 12px; + white-space: nowrap !important; + width: 80px; + line-height: 30px; + } + .text-width80 { + width: 100px; + line-height: 30px; + } + .ti-text-form-content { + line-height: 30px; + padding-bottom: 20px; + .text-span { + width: 450px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .text-span-new { + width: 800px; + overflow: hidden; + text-overflow: ellipsis; + height: 20%; + word-break: break-all; + } + } + } + } + } + } + } + } + } +} + From 6b5c64e8d9a5e3869800a75bce23607e57a41847 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 24 Nov 2022 11:07:49 +0800 Subject: [PATCH 027/370] fix issue --- templates/repo/cloudbrain/benchmark/show.tmpl | 196 ------------------------ templates/repo/cloudbrain/inference/show.tmpl | 195 ----------------------- templates/repo/cloudbrain/trainjob/show.tmpl | 195 ----------------------- templates/repo/grampus/trainjob/show.tmpl | 189 ----------------------- templates/repo/modelarts/inferencejob/show.tmpl | 155 ------------------- templates/repo/modelarts/trainjob/show.tmpl | 195 ----------------------- 6 files changed, 1125 deletions(-) diff --git a/templates/repo/cloudbrain/benchmark/show.tmpl b/templates/repo/cloudbrain/benchmark/show.tmpl index faef41a8b..c24b88a12 100755 --- a/templates/repo/cloudbrain/benchmark/show.tmpl +++ b/templates/repo/cloudbrain/benchmark/show.tmpl @@ -1,200 +1,4 @@ {{template "base/head" .}} -
diff --git a/templates/repo/cloudbrain/inference/show.tmpl b/templates/repo/cloudbrain/inference/show.tmpl index aee08d659..d41d48344 100644 --- a/templates/repo/cloudbrain/inference/show.tmpl +++ b/templates/repo/cloudbrain/inference/show.tmpl @@ -1,200 +1,5 @@ {{template "base/head" .}}
@@ -463,7 +267,7 @@
- +
From ee53509a89807c175796c486946f23c2424d3a40 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 24 Nov 2022 11:31:37 +0800 Subject: [PATCH 029/370] fix issue --- templates/repo/modelmanage/convertshowinfo.tmpl | 160 ++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/templates/repo/modelmanage/convertshowinfo.tmpl b/templates/repo/modelmanage/convertshowinfo.tmpl index 0aafadc36..e50e4c557 100644 --- a/templates/repo/modelmanage/convertshowinfo.tmpl +++ b/templates/repo/modelmanage/convertshowinfo.tmpl @@ -1,5 +1,165 @@ {{template "base/head" .}} - {{template "custom/global_mask" .}}
@@ -33,7 +15,6 @@ {{template "custom/alert_cb" .}} {{.CsrfTokenHtml}} -

{{.i18n.Tr "repo.cloudbrain.new"}} @@ -84,30 +65,6 @@ placeholder="{{.i18n.Tr "cloudbrain.task_name"}}" value="{{.display_job_name}}" tabindex="3" autofocus required maxlength="255" onkeyup="this.value=this.value.replace(/[, ]/g,'')">

- -
- - -
- -
- - -
- -
- -
-
-
-
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -
- + {{else}} +
{{end}}
From 88ab7b086af9da0157d44e1f6badbff1a7d02443 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Tue, 29 Nov 2022 17:28:04 +0800 Subject: [PATCH 066/370] fix issue --- templates/repo/debugjob/index.tmpl | 1 - templates/repo/grampus/notebook/gpu/new.tmpl | 7 +++---- templates/repo/grampus/notebook/npu/new.tmpl | 1 - templates/repo/grampus/notebook/show.tmpl | 3 --- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/templates/repo/debugjob/index.tmpl b/templates/repo/debugjob/index.tmpl index 0d6dc5c94..b5892c363 100755 --- a/templates/repo/debugjob/index.tmpl +++ b/templates/repo/debugjob/index.tmpl @@ -433,7 +433,6 @@ {{template "base/footer" .}} diff --git a/templates/repo/grampus/notebook/npu/new.tmpl b/templates/repo/grampus/notebook/npu/new.tmpl index 1857bf751..2f2232a97 100644 --- a/templates/repo/grampus/notebook/npu/new.tmpl +++ b/templates/repo/grampus/notebook/npu/new.tmpl @@ -209,5 +209,4 @@ shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, }); })(); - console.log({{ .Spec }}); diff --git a/templates/repo/grampus/notebook/show.tmpl b/templates/repo/grampus/notebook/show.tmpl index 5af5201ca..f31252a5f 100644 --- a/templates/repo/grampus/notebook/show.tmpl +++ b/templates/repo/grampus/notebook/show.tmpl @@ -348,7 +348,4 @@ }); $('td.ti-text-form-content.spec div').text(specStr); })(); - console.log("---------=-sdfsdfsd=") - console.log({{.task}}) - From 6ca1e78dd14189f3ed0628bb6e34f53c99d55784 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 08:56:35 +0800 Subject: [PATCH 067/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index be7b2fdd4..90b29dbcf 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -308,6 +308,10 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeResource) ctx.Data["NotStopTaskCount"] = NotStopTaskCount + ctx.Data["code_path"] = cloudbrain.CodeMountPath + ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath + ctx.Data["model_path"] = cloudbrain.ModelMountPath + return nil } From ca7655302ccdf85be91d41e72181ae06b6e72d44 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 09:35:57 +0800 Subject: [PATCH 068/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 2 +- services/cloudbrain/cloudbrainTask/sync_status.go | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index fb0d5d4a8..9cd4c428b 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -182,7 +182,7 @@ type Cloudbrain struct { BranchName string //分支名称 Parameters string //传给modelarts的param参数 BootFile string //启动文件 - DataUrl string //数据集的obs路径 + DataUrl string `xorm:"varchar(3500)"` //数据集的obs路径 LogUrl string //日志输出的obs路径 PreVersionId int64 //父版本的版本id FlavorCode string //modelarts上的规格id diff --git a/services/cloudbrain/cloudbrainTask/sync_status.go b/services/cloudbrain/cloudbrainTask/sync_status.go index ba84d5fa0..f9c4e2516 100644 --- a/services/cloudbrain/cloudbrainTask/sync_status.go +++ b/services/cloudbrain/cloudbrainTask/sync_status.go @@ -97,8 +97,10 @@ func SyncGrampusNotebookStatus(job *models.Cloudbrain) (*models.Cloudbrain, erro if oldStatus != job.Status { notification.NotifyChangeCloudbrainStatus(job, oldStatus) } - job.TrainUrl = result.JobInfo.Tasks[0].CodeUrl - job.DataUrl = result.JobInfo.Tasks[0].DataUrl + if job.ComputeResource == models.NPUResource { + job.TrainUrl = result.JobInfo.Tasks[0].CodeUrl + job.DataUrl = result.JobInfo.Tasks[0].DataUrl + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) From b7d03ff367bfae835287fc9a976abe042d265f2d Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Wed, 30 Nov 2022 10:32:42 +0800 Subject: [PATCH 069/370] #3188 add active user list query api --- models/user.go | 32 ++++++++++++++++++++++++++++ models/user_business_analysis.go | 6 ++++++ routers/home.go | 18 ++++++++++++++++ routers/routes/routes.go | 7 +++++- services/repository/square.go | 46 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 1 deletion(-) diff --git a/models/user.go b/models/user.go index b21858e37..ec636e000 100755 --- a/models/user.go +++ b/models/user.go @@ -198,6 +198,38 @@ type SearchOrganizationsOptions struct { All bool } +type User4Front struct { + ID int64 + LowerName string `xorm:"UNIQUE NOT NULL"` + Name string `xorm:"UNIQUE NOT NULL"` + FullName string + Email string `xorm:"NOT NULL"` + Language string `xorm:"VARCHAR(5)"` + Description string + RelAvatarLink string + CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` + UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` +} + +func (u *User) ToFrontFormat() *User4Front { + uf := &User4Front{ + ID: u.ID, + LowerName: u.LowerName, + Name: u.Name, + FullName: u.FullName, + Email: u.Email, + Language: u.Language, + Description: u.Description, + CreatedUnix: u.CreatedUnix, + UpdatedUnix: u.UpdatedUnix, + } + if !u.KeepEmailPrivate { + uf.Email = u.Email + } + uf.RelAvatarLink = u.RelAvatarLink() + return uf +} + // GenerateRandomAvatar generates a random avatar for user. func (u *User) IsBindWechat() bool { return u.WechatOpenId != "" diff --git a/models/user_business_analysis.go b/models/user_business_analysis.go index 394c24825..5ee79f16e 100644 --- a/models/user_business_analysis.go +++ b/models/user_business_analysis.go @@ -2424,3 +2424,9 @@ func GetContentFromPromote(url string) (string, error) { allLineStr := string(bytes) return allLineStr, nil } + +func QueryLast30DaysHighestIndexUsers(size int) ([]int64, error) { + userIds := make([]int64, 0) + err := xStatistic.Table("user_business_analysis_last30_day").Cols("id").OrderBy("user_index").Limit(size).Find(&userIds) + return userIds, err +} diff --git a/routers/home.go b/routers/home.go index 7b680bca0..f8363bb1a 100755 --- a/routers/home.go +++ b/routers/home.go @@ -317,6 +317,24 @@ func RepoSquare(ctx *context.Context) { resultMap["Repos"] = result ctx.JSON(http.StatusOK, response.SuccessWithData(resultMap)) } + +func ActiveUser(ctx *context.Context) { + var err error + var currentUserId int64 + if ctx.User != nil { + currentUserId = ctx.User.ID + } + result, err := repository.GetActiveUser4Square(currentUserId) + if err != nil { + log.Error("ActiveUser err. %v", err) + ctx.JSON(http.StatusOK, response.Success()) + return + } + resultMap := make(map[string]interface{}, 0) + resultMap["Users"] = result + ctx.JSON(http.StatusOK, response.SuccessWithData(resultMap)) +} + func RepoFind(ctx *context.Context) { keyword := strings.Trim(ctx.Query("q"), " ") topic := strings.Trim(ctx.Query("topic"), " ") diff --git a/routers/routes/routes.go b/routers/routes/routes.go index e902d10be..31d72a96b 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -372,7 +372,12 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/repos", func() { m.Get("", routers.ExploreRepos) - m.Get("/square", routers.RepoSquare) + m.Group("/square", func() { + m.Get("/tab", routers.RepoSquare) + m.Get("/active-user", routers.ActiveUser) + m.Get("/active-org", routers.RepoSquare) + }) + m.Get("/search", routers.RepoFind) }) m.Get("/datasets", routers.ExploreDatasets) diff --git a/services/repository/square.go b/services/repository/square.go index cb7b9e4db..4465cbe9b 100644 --- a/services/repository/square.go +++ b/services/repository/square.go @@ -144,3 +144,49 @@ func FindRepos(opts FindReposOptions) (*models.FindReposResponse, error) { PageSize: opts.PageSize, }, nil } + +type ActiveUser struct { + User *models.User4Front + Followed bool + ShowButton bool +} + +func GetActiveUser4Square(currentUserId int64) ([]*ActiveUser, error) { + result := make([]*ActiveUser, 0) + userIds, err := models.QueryLast30DaysHighestIndexUsers(10) + if err != nil { + log.Error("ActiveUser err. %v", err) + return result, err + } + if len(userIds) == 0 { + return result, nil + } + + users, err := models.GetUsersByIDs(userIds) + if err != nil { + return result, nil + } + usersMap := make(map[int64]*models.User) + for _, v := range users { + usersMap[v.ID] = v + } + + for i := 0; i < len(userIds); i++ { + userId := userIds[i] + user := usersMap[userId] + if user == nil { + continue + } + isFollowed := false + if currentUserId != 0 { + isFollowed = models.IsFollowing(currentUserId, userId) + } + a := &ActiveUser{ + Followed: isFollowed, + User: user.ToFrontFormat(), + ShowButton: currentUserId != userId, + } + result = append(result, a) + } + return result, nil +} From 34a41483950b8f35be604227ccdf25fcc69eb887 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 10:41:10 +0800 Subject: [PATCH 070/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 17 +++++++++++++---- routers/repo/grampus.go | 2 -- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index b81daaa02..fe8fb75ae 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -2,6 +2,7 @@ package grampus import ( "encoding/json" + "fmt" "strings" "code.gitea.io/gitea/models" @@ -29,6 +30,7 @@ const ( BucketRemote = "grampus" RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 + CommandGpuDebug = "%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" ) var ( @@ -121,8 +123,9 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram } return datasetGrampus } -func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) []models.GrampusDataset { +func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { var datasetGrampus []models.GrampusDataset + var command = "" for _, datasetInfo := range datasetInfos { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ Name: datasetInfo.FullName, @@ -130,11 +133,13 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) []models.G EndPoint: setting.Attachment.Minio.Endpoint, ObjectKey: datasetInfo.DataLocalPath, ReadOnly: true, - ContainerPath: "/dataset/" + datasetInfo.Name, + ContainerPath: "/dataset1/" + datasetInfo.Name, }) + command += "cp /dataset1/" + datasetInfo.Name + " /dataset/" + datasetInfo.FullName + ";" + } - return datasetGrampus + return datasetGrampus, command } func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { @@ -142,6 +147,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job var datasetGrampus []models.GrampusDataset var codeGrampus models.GrampusDataset + var cpCommand string imageUrl := req.ImageUrl if ProcessorTypeNPU == req.ProcessType { datasetGrampus = getDatasetGrampus(req.DatasetInfos) @@ -152,8 +158,9 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", } imageUrl = "" + req.Command = "" } else { - datasetGrampus = getDatasetGPUGrampus(req.DatasetInfos) + datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) codeGrampus = models.GrampusDataset{ Name: req.CodeName, Bucket: setting.Attachment.Minio.Bucket, @@ -162,6 +169,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ReadOnly: false, ContainerPath: "/code", } + req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand) } @@ -177,6 +185,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job Code: codeGrampus, AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, + Command: req.Command, }, }, }) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 90b29dbcf..1630880ed 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -234,14 +234,12 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook } commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) - command := "" req := &grampus.GenerateNotebookJobReq{ JobName: jobName, DisplayJobName: displayJobName, ComputeResource: computeSource, ProcessType: processType, - Command: command, ImageUrl: image, ImageId: form.ImageID, Description: description, From 670e21dd15b0d49f195c6abfbe81b3cb4e199942 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 10:47:03 +0800 Subject: [PATCH 071/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index fe8fb75ae..fbdbf47f8 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -126,7 +126,7 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { var datasetGrampus []models.GrampusDataset var command = "" - for _, datasetInfo := range datasetInfos { + for uuid, datasetInfo := range datasetInfos { datasetGrampus = append(datasetGrampus, models.GrampusDataset{ Name: datasetInfo.FullName, Bucket: setting.Attachment.Minio.Bucket, @@ -136,7 +136,7 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. ContainerPath: "/dataset1/" + datasetInfo.Name, }) - command += "cp /dataset1/" + datasetInfo.Name + " /dataset/" + datasetInfo.FullName + ";" + command += "cp /dataset1/" + datasetInfo.Name + "/" + uuid + " /dataset/" + datasetInfo.FullName + ";" } return datasetGrampus, command From 14e21bfdf4c37da31416cfca732be550e6957a6b Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 10:52:32 +0800 Subject: [PATCH 072/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 1 + 1 file changed, 1 insertion(+) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 9cd4c428b..604cccfeb 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1581,6 +1581,7 @@ type GrampusNotebookTask struct { Token string `json:"token"` Url string `json:"url"` Status string `json:"status"` + Command string `json:"command"` } type GrampusDataset struct { From 76d780cf8c2c6bcca72b0daa5a4478e9dfbcaa51 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 11:08:47 +0800 Subject: [PATCH 073/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index fbdbf47f8..bf5b2ef9a 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -136,7 +136,7 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. ContainerPath: "/dataset1/" + datasetInfo.Name, }) - command += "cp /dataset1/" + datasetInfo.Name + "/" + uuid + " /dataset/" + datasetInfo.FullName + ";" + command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" } return datasetGrampus, command From 788f8807fcbcf6bf5ad33d26a62cc99b938a3f33 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Wed, 30 Nov 2022 11:14:18 +0800 Subject: [PATCH 074/370] #3188 add active org list query api --- models/user.go | 2 ++ routers/home.go | 11 +++++++++++ routers/routes/routes.go | 2 +- services/repository/square.go | 12 ++++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/models/user.go b/models/user.go index ec636e000..e4eed1f9a 100755 --- a/models/user.go +++ b/models/user.go @@ -207,6 +207,7 @@ type User4Front struct { Language string `xorm:"VARCHAR(5)"` Description string RelAvatarLink string + NumMembers int CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` } @@ -222,6 +223,7 @@ func (u *User) ToFrontFormat() *User4Front { Description: u.Description, CreatedUnix: u.CreatedUnix, UpdatedUnix: u.UpdatedUnix, + NumMembers: u.NumMembers, } if !u.KeepEmailPrivate { uf.Email = u.Email diff --git a/routers/home.go b/routers/home.go index f8363bb1a..a1043655e 100755 --- a/routers/home.go +++ b/routers/home.go @@ -334,6 +334,17 @@ func ActiveUser(ctx *context.Context) { resultMap["Users"] = result ctx.JSON(http.StatusOK, response.SuccessWithData(resultMap)) } +func ActiveOrg(ctx *context.Context) { + result, err := repository.GetActiveOrgs() + if err != nil { + log.Error("ActiveOrg err. %v", err) + ctx.JSON(http.StatusOK, response.Success()) + return + } + resultMap := make(map[string]interface{}, 0) + resultMap["Orgs"] = result + ctx.JSON(http.StatusOK, response.SuccessWithData(resultMap)) +} func RepoFind(ctx *context.Context) { keyword := strings.Trim(ctx.Query("q"), " ") diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 31d72a96b..8f60153db 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -375,7 +375,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/square", func() { m.Get("/tab", routers.RepoSquare) m.Get("/active-user", routers.ActiveUser) - m.Get("/active-org", routers.RepoSquare) + m.Get("/active-org", routers.ActiveOrg) }) m.Get("/search", routers.RepoFind) diff --git a/services/repository/square.go b/services/repository/square.go index 4465cbe9b..55858db81 100644 --- a/services/repository/square.go +++ b/services/repository/square.go @@ -190,3 +190,15 @@ func GetActiveUser4Square(currentUserId int64) ([]*ActiveUser, error) { } return result, nil } + +func GetActiveOrgs() ([]*models.User4Front, error) { + orgScores, err := models.FindTopNOpenIOrgs(10) + if err != nil { + return nil, err + } + orgs := make([]*models.User4Front, len(orgScores)) + for i, v := range orgScores { + orgs[i] = v.ToFrontFormat() + } + return orgs, nil +} From 7ef380ffd64aed3d42b124b0a81ca0bd555dea95 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 30 Nov 2022 11:42:56 +0800 Subject: [PATCH 075/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/auth/grampus.go | 23 ++++++++++++++--------- modules/auth/modelarts.go | 19 ++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/modules/auth/grampus.go b/modules/auth/grampus.go index a50613613..f8a238124 100755 --- a/modules/auth/grampus.go +++ b/modules/auth/grampus.go @@ -31,15 +31,20 @@ func (f *CreateGrampusTrainJobForm) Validate(ctx *macaron.Context, errs binding. } type CreateGrampusNotebookForm struct { - Type int `form:"type"` - DisplayJobName string `form:"display_job_name" binding:"Required"` - Attachment string `form:"attachment"` - ImageID string `form:"image_id" binding:"Required"` - Description string `form:"description"` - BranchName string `form:"branch_name" binding:"Required"` - Image string `form:"image" binding:"Required"` - DatasetName string `form:"dataset_name"` - SpecId int64 `form:"spec_id" binding:"Required"` + Type int `form:"type"` + DisplayJobName string `form:"display_job_name" binding:"Required"` + Attachment string `form:"attachment"` + ImageID string `form:"image_id" binding:"Required"` + Description string `form:"description"` + BranchName string `form:"branch_name" binding:"Required"` + Image string `form:"image" binding:"Required"` + DatasetName string `form:"dataset_name"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` + SpecId int64 `form:"spec_id" binding:"Required"` } func (f *CreateGrampusNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 0221c51d8..0061648ce 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -16,13 +16,18 @@ func (f *CreateModelArtsForm) Validate(ctx *macaron.Context, errs binding.Errors } type CreateModelArtsNotebookForm struct { - DisplayJobName string `form:"display_job_name" binding:"Required"` - JobName string `form:"job_name" binding:"Required"` - Attachment string `form:"attachment"` - Description string `form:"description"` - Flavor string `form:"flavor" binding:"Required"` - ImageId string `form:"image_id" binding:"Required"` - SpecId int64 `form:"spec_id" binding:"Required"` + DisplayJobName string `form:"display_job_name" binding:"Required"` + JobName string `form:"job_name" binding:"Required"` + Attachment string `form:"attachment"` + Description string `form:"description"` + Flavor string `form:"flavor" binding:"Required"` + ImageId string `form:"image_id" binding:"Required"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` + SpecId int64 `form:"spec_id" binding:"Required"` } func (f *CreateModelArtsNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { From 3fcafa1dae8b013138231f6af25eefd13f5e3e98 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Thu, 1 Dec 2022 09:07:01 +0800 Subject: [PATCH 076/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index bf5b2ef9a..87d31fb96 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -170,6 +170,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ContainerPath: "/code", } req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand) + log.Info("debug command:" + req.Command) } From eb4bc466e6736c6fdd4a7591ad5a1f31a83f6bc0 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Thu, 1 Dec 2022 09:31:39 +0800 Subject: [PATCH 077/370] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 87d31fb96..eeed0296a 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -30,7 +30,7 @@ const ( BucketRemote = "grampus" RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 - CommandGpuDebug = "%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" + CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" ) var ( From 363dc27fa590665d2f06e7451d7189882de1b457 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 1 Dec 2022 10:27:22 +0800 Subject: [PATCH 078/370] #3169 update --- models/list_options.go | 12 ++++++------ routers/home.go | 14 +++++++++++++- routers/routes/routes.go | 4 +++- services/repository/square.go | 16 ++++++++-------- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/models/list_options.go b/models/list_options.go index 29144ba22..d6d1dcf0d 100644 --- a/models/list_options.go +++ b/models/list_options.go @@ -10,15 +10,15 @@ import ( "xorm.io/xorm" ) -type SelectedPageSize int +type AvailablePageSize int const ( - PageSize15 SelectedPageSize = 15 - PageSize30 SelectedPageSize = 30 - PageSize50 SelectedPageSize = 50 + PageSize15 AvailablePageSize = 15 + PageSize30 AvailablePageSize = 30 + PageSize50 AvailablePageSize = 50 ) -func (s SelectedPageSize) IsLegal() bool { +func (s AvailablePageSize) IsLegal() bool { switch s { case PageSize30, PageSize50, PageSize15: return true @@ -26,7 +26,7 @@ func (s SelectedPageSize) IsLegal() bool { return false } -func (s SelectedPageSize) Int() int { +func (s AvailablePageSize) Int() int { return int(s) } diff --git a/routers/home.go b/routers/home.go index a1043655e..d54a0160f 100755 --- a/routers/home.go +++ b/routers/home.go @@ -43,6 +43,8 @@ const ( tplHomeTerm base.TplName = "terms" tplHomePrivacy base.TplName = "privacy" tplResoruceDesc base.TplName = "resource_desc" + tplRepoSquare base.TplName = "explore/repos/square" + tplRepoSearch base.TplName = "explore/repos/search" ) // Home render home page @@ -296,6 +298,13 @@ func ExploreRepos(ctx *context.Context) { }) } +func GetRepoSquarePage(ctx *context.Context) { + ctx.HTML(200, tplRepoSquare) +} +func GetRepoSearchPage(ctx *context.Context) { + ctx.HTML(200, tplRepoSearch) +} + func RepoSquare(ctx *context.Context) { var result []*models.Repository4Card var err error @@ -351,7 +360,10 @@ func RepoFind(ctx *context.Context) { topic := strings.Trim(ctx.Query("topic"), " ") sort := strings.Trim(ctx.Query("sort"), " ") page := ctx.QueryInt("page") - pageSize := models.SelectedPageSize(ctx.QueryInt("pageSize")) + pageSize := models.AvailablePageSize(ctx.QueryInt("pageSize")) + if pageSize == 0 { + pageSize = models.PageSize15 + } if !pageSize.IsLegal() { ctx.JSON(http.StatusOK, response.ServerError("pageSize illegal")) return diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 8f60153db..8ecca32e2 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -371,8 +371,10 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/images/star", repo.GetStarImages) m.Group("/repos", func() { - m.Get("", routers.ExploreRepos) + //m.Get("", routers.ExploreRepos) + m.Get("", routers.GetRepoSearchPage) m.Group("/square", func() { + m.Get("", routers.GetRepoSquarePage) m.Get("/tab", routers.RepoSquare) m.Get("/active-user", routers.ActiveUser) m.Get("/active-org", routers.ActiveOrg) diff --git a/services/repository/square.go b/services/repository/square.go index 55858db81..7ff36c5af 100644 --- a/services/repository/square.go +++ b/services/repository/square.go @@ -85,31 +85,31 @@ func FindRepos(opts FindReposOptions) (*models.FindReposResponse, error) { switch opts.Sort { //1.近期热门:按最近1个月浏览量倒序排序,最近1个月浏览量>最近更新>项目名称升序 - case "most_popular": + case "mostpopular": orderBy = models.SearchOrderByLastMonthVisitsReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //2.近期活跃:按提交增长量(最近4个月commit数)倒序排序,提交增长量>最近更新>项目名称升序。 - case "most_active": + case "mostactive": orderBy = models.SearchOrderByLastFourMonthCommitsReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //3.最近更新:按最近更新>项目名称升序排序。 - case "recent_update": + case "recentupdate": orderBy = models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //4.最近创建:按项目创建时间排序,最近的排前面。最近创建>项目名称升序。 case "newest": orderBy = models.SearchOrderByNewest + "," + models.SearchOrderByAlphabetically //5.点赞最多:按点赞数倒序排序。点赞数>最近更新>项目名称升序。 - case "most_stars": + case "moststars": orderBy = models.SearchOrderByStarsReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //6.派生最多:按派生数倒序排序。派生数>最近更新>项目名称升序。 - case "most_forks": + case "mostforks": orderBy = models.SearchOrderByForksReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //7.数据集最多:按项目包含的数据集文件数量倒序排序,数据集文件数>最近更新>项目名称升序。 - case "most_datasets": + case "mostdatasets": orderBy = models.SearchOrderByDatasetCntReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //8.AI任务最多:按项目包含的AI任务数量倒序排序,AI任务数>最近更新>项目名称升序。 - case "most_ai_tasks": + case "mostaitasks": orderBy = models.SearchOrderByAiTaskCntReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically //9.模型最多:按项目包含的模型数量倒序排序,模型大小为0则不统计。模型数>最近更新>项目名称升序。 - case "most_models": + case "mostmodels": orderBy = models.SearchOrderByModelCntReverse + "," + models.SearchOrderByRecentUpdated + "," + models.SearchOrderByAlphabetically default: From 0d2435f9871c8734263b0515ecb97d379dab6f7c Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 1 Dec 2022 16:13:01 +0800 Subject: [PATCH 079/370] fix issue --- templates/custom/select_model.tmpl | 4 ++-- templates/repo/modelarts/trainjob/new.tmpl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/custom/select_model.tmpl b/templates/custom/select_model.tmpl index 81332b873..1cffc86c9 100644 --- a/templates/custom/select_model.tmpl +++ b/templates/custom/select_model.tmpl @@ -1,6 +1,6 @@ -
-   +
+