diff --git a/models/attachment.go b/models/attachment.go index 2b747db21..2788ef559 100755 --- a/models/attachment.go +++ b/models/attachment.go @@ -259,6 +259,17 @@ func GetAttachmentsByCommentID(commentID int64) ([]*Attachment, error) { return getAttachmentsByCommentID(x, commentID) } +func GetAttachmentByDatasetIdFileName(fileName string, datasetId int64) (*Attachment, error) { + attach := &Attachment{DatasetID: datasetId, Name: fileName} + has, err := x.Get(attach) + if err != nil { + return nil, err + } else if !has { + return nil, err + } + return attach, nil +} + func getAttachmentsByCommentID(e Engine, commentID int64) ([]*Attachment, error) { attachments := make([]*Attachment, 0, 10) return attachments, e.Where("comment_id=?", commentID).Find(&attachments) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 37965c73a..85e3c36f8 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -60,6 +60,7 @@ const ( JobTypeModelSafety JobType = "MODELSAFETY" JobTypeSnn4imagenet JobType = "SNN4IMAGENET" JobTypeBrainScore JobType = "BRAINSCORE" + JobTypeSnn4Ecoset JobType = "SNN4ECOSET" JobTypeTrain JobType = "TRAIN" JobTypeInference JobType = "INFERENCE" @@ -205,7 +206,7 @@ type Cloudbrain struct { BenchmarkTypeRankLink string `xorm:"-"` StartTime timeutil.TimeStamp EndTime timeutil.TimeStamp - Cleared bool `xorm:"DEFAULT false"` + Cleared bool `xorm:"DEFAULT false"` Spec *Specification `xorm:"-"` } @@ -335,6 +336,9 @@ func IsModelArtsDebugJobTerminal(status string) bool { func IsCloudBrainOneDebugJobTerminal(status string) bool { return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) } +func IsModelBenchMarkJobType(jobType string) bool { + return jobType == string(JobTypeSnn4imagenet) || jobType == string(JobTypeBrainScore) || jobType == string(JobTypeSnn4Ecoset) +} func ParseAndSetDurationFromCloudBrainOne(result JobResultPayload, task *Cloudbrain) { isActivated := result.JobStatus.CreatedTime > 0 @@ -449,29 +453,32 @@ type GetImagesPayload struct { type CloudbrainsOptions struct { ListOptions - RepoID int64 // include all repos if empty - UserID int64 - JobID string - SortType string - CloudbrainIDs []int64 - JobStatus []string - JobStatusNot bool - Keyword string - Type int - JobTypes []string - VersionName string - IsLatestVersion string - JobTypeNot bool - NeedRepoInfo bool - RepoIDList []int64 - BeginTime time.Time - EndTime time.Time - ComputeResource string - BeginTimeUnix int64 - EndTimeUnix int64 - AiCenter string - NeedDeleteInfo string - Cluster string + RepoID int64 // include all repos if empty + UserID int64 + JobID string + SortType string + CloudbrainIDs []int64 + JobStatus []string + JobStatusNot bool + Keyword string + Type int + JobTypes []string + VersionName string + IsLatestVersion string + JobTypeNot bool + NeedRepoInfo bool + RepoIDList []int64 + BeginTime time.Time + EndTime time.Time + ComputeResource string + BeginTimeUnix int64 + EndTimeUnix int64 + AiCenter string + NeedDeleteInfo string + Cluster string + AccCardType string + AccCardsNum int + WorkServerNumber int } type TaskPod struct { @@ -1559,7 +1566,8 @@ type CreateGrampusJobResponse struct { type GetGrampusJobResponse struct { GrampusResult - JobInfo GrampusJobInfo `json:"otJob"` + JobInfo GrampusJobInfo `json:"otJob"` + ExitDiagnostics string `json:"exitDiagnostics"` } type GrampusNotebookResponse struct { @@ -1823,7 +1831,7 @@ func QueryModelTrainJobVersionList(jobId string) ([]*Cloudbrain, int, error) { return cloudbrains, int(len(cloudbrains)), nil } -func QueryModelTrainJobList(repoId int64) ([]*CloudbrainInfo, int, error) { +func QueryModelTrainJobList(repoId int64) ([]*Cloudbrain, int, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() @@ -1840,14 +1848,14 @@ func QueryModelTrainJobList(repoId int64) ([]*CloudbrainInfo, int, error) { // builder.In("type", 0, 1), // ) - cloudbrains := make([]*CloudbrainInfo, 0) + cloudbrains := make([]*Cloudbrain, 0) if err := sess.Select("job_id,display_job_name").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC"). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } keys := make(map[string]string) - uniqueElements := make([]*CloudbrainInfo, 0) + uniqueElements := make([]*Cloudbrain, 0) for _, entry := range cloudbrains { if _, value := keys[entry.JobID]; !value { keys[entry.JobID] = entry.DisplayJobName @@ -1988,7 +1996,7 @@ func GetCloudbrainByID(id string) (*Cloudbrain, error) { return getRepoCloudBrain(cb) } -func IsCloudbrainExistByJobName(jobName string)(bool,error){ +func IsCloudbrainExistByJobName(jobName string) (bool, error) { return x.Unscoped().Exist(&Cloudbrain{ JobName: jobName, }) @@ -2126,6 +2134,15 @@ func GetCloudbrainByName(jobName string) (*Cloudbrain, error) { cb := &Cloudbrain{JobName: jobName} return getRepoCloudBrain(cb) } +func GetWaitOrRunFileNotebookByRepo(repoId int64, cloudbrainType int) (*Cloudbrain, error) { + cloudBrain := new(Cloudbrain) + has, err := x.In("status", JobWaiting, JobRunning, ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, + ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsDeleting, ModelArtsRestarting).Where("repo_id=? and type=? and boot_file!=''", repoId, cloudbrainType).Get(cloudBrain) + if has { + return cloudBrain, err + } + return nil, err +} func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool { if !isSigned || (job.Status != string(JobStopped) && job.Status != string(JobFailed) && job.Status != string(ModelArtsStartFailed) && job.Status != string(ModelArtsCreateFailed)) { @@ -2159,7 +2176,7 @@ func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) { Find(&cloudbrains) } -func GetCloudBrainOneStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { +func GetGPUStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) endTimeBefore := time.Now().Unix() - int64(days)*24*3600 missEndTimeBefore := endTimeBefore - 24*3600 @@ -2168,29 +2185,29 @@ func GetCloudBrainOneStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbra JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted, ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed, ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed). - Where("(((end_time is null or end_time=0) and updated_unix and updated_unix != 0 ) or (end_time and end_time != 0)) and cleared=false and type=0 and job_type != 'DEBUG'", missEndTimeBefore, endTimeBefore). + Where("(((end_time is null or end_time=0) and updated_unix and updated_unix != 0 ) or (end_time and end_time != 0)) and cleared=false and (type=0 or (type =2 and compute_resource='CPU/GPU')) and job_type != 'DEBUG'", missEndTimeBefore, endTimeBefore). Limit(limit). Find(&cloudbrains) } + /** 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间 - */ -func GetCloudBrainOneStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { +*/ +func GetGPUStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) endTimeBefore := time.Now().Unix() - int64(days)*24*3600 missEndTimeBefore := endTimeBefore - 24*3600 - sql:=`SELECT id,job_name,job_id from (SELECT DISTINCT ON (job_name) + sql := `SELECT id,job_name,job_id from (SELECT DISTINCT ON (job_name) id, job_name, job_id,status,end_time,updated_unix,cleared FROM cloudbrain - where type=0 and job_type='DEBUG' + where (type=0 or (type =2 and compute_resource='CPU/GPU')) and job_type='DEBUG' ORDER BY job_name, updated_unix DESC) a where status in ('STOPPED','SUCCEEDED','FAILED') and (((end_time is null or end_time=0) and updated_unix and updated_unix != 0 ) or (end_time and end_time != 0)) and cleared=false` - return cloudbrains, x.Unscoped().SQL(sql,missEndTimeBefore, endTimeBefore).Limit(limit).Find(&cloudbrains) + return cloudbrains, x.Unscoped().SQL(sql, missEndTimeBefore, endTimeBefore).Limit(limit).Find(&cloudbrains) } - func UpdateCloudBrainRecordsCleared(ids []int64) error { pageSize := 150 n := len(ids) / pageSize @@ -2422,18 +2439,44 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { ) } + if opts.WorkServerNumber > 0 { + if opts.WorkServerNumber == 1 { + cond = cond.And(builder.Or( + builder.Eq{"cloudbrain.work_server_number": 0}, + builder.Eq{"cloudbrain.work_server_number": 1}, + builder.IsNull{"cloudbrain.work_server_number"}, + )) + } else { + cond = cond.And( + builder.Eq{"cloudbrain.work_server_number": opts.WorkServerNumber}, + ) + } + } + + if opts.AccCardType != "" { + cond = cond.And(builder.Eq{"cloudbrain_spec.acc_card_type": opts.AccCardType}) + } + if opts.AccCardsNum >= 0 { + cond = cond.And(builder.Eq{"cloudbrain_spec.acc_cards_num": opts.AccCardsNum}) + } + var count int64 var err error condition := "cloudbrain.user_id = `user`.id" if len(opts.Keyword) == 0 { - count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain)) + count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). + Join("left", "`user`", condition). + Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). + Count(new(CloudbrainInfo)) } else { lowerKeyWord := strings.ToLower(opts.Keyword) cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). - Join("left", "`user`", condition).Count(new(CloudbrainInfo)) + Join("left", "`user`", condition). + Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). + Count(new(CloudbrainInfo)) } @@ -2455,6 +2498,7 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). Join("left", "`user`", condition). + Join("left", "cloudbrain_spec", "cloudbrain.id = cloudbrain_spec.cloudbrain_id"). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } @@ -2609,6 +2653,7 @@ type DatasetInfo struct { DataLocalPath string Name string FullName string + Size int } func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { @@ -2664,6 +2709,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn DataLocalPath: dataLocalPath, Name: fileName, FullName: attach.Name, + Size: int(attach.Size), } if i == 0 { datasetNames = attach.Name diff --git a/models/user.go b/models/user.go index dad252d92..d34860f2d 100755 --- a/models/user.go +++ b/models/user.go @@ -16,6 +16,7 @@ import ( "fmt" _ "image/jpeg" // Needed for jpeg support "image/png" + "math/rand" "os" "path/filepath" "regexp" @@ -495,7 +496,11 @@ func (u *User) RealSizedAvatarLink(size int) string { // may either be a sub-URL to this site, or a full URL to an external avatar // service. func (u *User) RelAvatarLink() string { - return u.SizedRelAvatarLink(base.DefaultAvatarSize) + append := "" + if u.UseCustomAvatar { + append = "?" + fmt.Sprint(rand.Intn(100)) + } + return u.SizedRelAvatarLink(base.DefaultAvatarSize) + append } // AvatarLink returns user avatar absolute link. diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 0061648ce..874bbb0a4 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -28,6 +28,7 @@ type CreateModelArtsNotebookForm struct { LabelName string `form:"label_names"` PreTrainModelUrl string `form:"pre_train_model_url"` SpecId int64 `form:"spec_id" binding:"Required"` + DatasetName string `form:"dataset_name"` } func (f *CreateModelArtsNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index c85f4b8cd..1929c58a6 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -32,10 +32,10 @@ const ( Snn4imagenetMountPath = "/snn4imagenet" BrainScoreMountPath = "/brainscore" TaskInfoName = "/taskInfo" - Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s' >/model/benchmark-log.txt` - BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s' >/model/benchmark-log.txt` - - SubTaskName = "task1" + Snn4imagenetCommand = `/opt/conda/bin/python /benchmark/testSNN_script.py --modelname '%s' --modelpath '/pretrainmodel/%s' --modeldescription '%s' >/model/benchmark-log.txt` + BrainScoreCommand = `bash /benchmark/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/pretrainmodel/%s' -d '%s' >/model/benchmark-log.txt` + Snn4EcosetCommand = `/opt/conda/bin/python /benchmark/testSNN_script.py --datapath '/dataset' --modelname '%s' --modelpath '/pretrainmodel/%s' --modeldescription '%s' >/model/benchmark-log.txt` + SubTaskName = "task1" Success = "S000" @@ -259,20 +259,6 @@ func GenerateTask(req GenerateCloudBrainTaskReq) (string, error) { }, { HostPath: models.StHostPath{ - Path: req.Snn4ImageNetPath, - MountPath: Snn4imagenetMountPath, - ReadOnly: true, - }, - }, - { - HostPath: models.StHostPath{ - Path: req.BrainScorePath, - MountPath: BrainScoreMountPath, - ReadOnly: true, - }, - }, - { - HostPath: models.StHostPath{ Path: req.ResultPath, MountPath: ResultPath, ReadOnly: false, @@ -406,7 +392,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) (string, error) { } func IsBenchmarkJob(jobType string) bool { - return string(models.JobTypeModelSafety) == jobType || string(models.JobTypeBenchmark) == jobType || string(models.JobTypeBrainScore) == jobType || string(models.JobTypeSnn4imagenet) == jobType + return string(models.JobTypeModelSafety) == jobType || string(models.JobTypeBenchmark) == jobType || string(models.JobTypeBrainScore) == jobType || string(models.JobTypeSnn4imagenet) == jobType || string(models.JobTypeSnn4Ecoset) == jobType } func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTypes ...models.JobType) int64 { @@ -675,6 +661,7 @@ func IsElementExist(s []string, str string) bool { return false } + func GetCloudBrainByIdOrJobId(id string, initialQuery string) (*models.Cloudbrain, error) { _, err := strconv.ParseInt(id, 10, 64) var job *models.Cloudbrain @@ -709,6 +696,7 @@ type GenerateModelArtsNotebookReq struct { ImageId string AutoStopDurationMs int64 + BranchName string Spec *models.Specification ModelName string diff --git a/modules/convert/cloudbrain.go b/modules/convert/cloudbrain.go index 599da4800..73e37b1ea 100644 --- a/modules/convert/cloudbrain.go +++ b/modules/convert/cloudbrain.go @@ -28,14 +28,13 @@ func ToCloudBrain(task *models.Cloudbrain) *api.Cloudbrain { BootFile: task.BootFile, Description: task.Description, ModelName: task.ModelName, - - ModelVersion: task.ModelVersion, - CkptName: task.CkptName, + VersionName: task.VersionName, + ModelVersion: task.ModelVersion, + CkptName: task.CkptName, StartTime: int64(task.StartTime), EndTime: int64(task.EndTime), - - Spec: ToSpecification(task.Spec), + Spec: ToSpecification(task.Spec), } } func ToAttachment(attachment *models.Attachment) *api.AttachmentShow { @@ -89,6 +88,9 @@ func ToDataset(dataset *models.Dataset) *api.Dataset { } func ToSpecification(s *models.Specification) *api.SpecificationShow { + if s == nil { + return nil + } return &api.SpecificationShow{ ID: s.ID, AccCardsNum: s.AccCardsNum, diff --git a/modules/grampus/resty.go b/modules/grampus/resty.go index a0d5384e2..3611240b9 100755 --- a/modules/grampus/resty.go +++ b/modules/grampus/resty.go @@ -198,7 +198,6 @@ sendjob: SetAuthToken(TOKEN). SetResult(&result). Get(HOST + urlTrainJob + "/" + jobID) - if err != nil { return nil, fmt.Errorf("resty GetJob: %v", err) } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index dcad1eb00..7f67e36f4 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -1,13 +1,9 @@ package modelarts import ( - "encoding/base64" "encoding/json" "errors" "fmt" - "io/ioutil" - "net/http" - "path" "strconv" "strings" @@ -26,9 +22,9 @@ import ( const ( //notebook - storageTypeOBS = "obs" - autoStopDuration = 4 * 60 * 60 - AutoStopDurationMs = 4 * 60 * 60 * 1000 + storageTypeOBS = "obs" + autoStopDuration = 4 * 60 * 60 + AutoStopDurationMs = 4 * 60 * 60 * 1000 CodePath = "/code/" OutputPath = "/output/" @@ -172,7 +168,6 @@ type OrgMultiNode struct { Node []int `json:"node"` } - type Parameters struct { Parameter []struct { Label string `json:"label"` @@ -239,6 +234,7 @@ func GenerateNotebook2(ctx *context.Context, req cloudbrain.GenerateModelArtsNot ComputeResource: models.NPUResource, Image: imageName, BootFile: req.BootFile, + BranchName: req.BranchName, Description: req.Description, CreatedUnix: createTime, UpdatedUnix: createTime, @@ -830,10 +826,6 @@ func HandleNotebookInfo(task *models.Cloudbrain) error { task.FlavorCode = result.Flavor } - if oldStatus != task.Status && task.Status == string(models.ModelArtsRunning) && task.BootFile != "" { - uploadNoteBookFile(task, result) - - } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) @@ -844,81 +836,6 @@ func HandleNotebookInfo(task *models.Cloudbrain) error { return nil } -func uploadNoteBookFile(task *models.Cloudbrain, result *models.GetNotebook2Result) { - jupyterUrl := result.Url + "?token=" + result.Token - - cookies, xsrf := getCookiesAndCsrf(jupyterUrl) - if xsrf == "" { - log.Error("browser jupyterUrl failed:%v", task.DisplayJobName) - } else { - - codePath := setting.JobPath + task.JobName + cloudbrain.CodeMountPath - fileContents, err := ioutil.ReadFile(codePath + "/" + task.BootFile) - if err != nil { - log.Error("read jupyter file failed:%v", task.DisplayJobName, err) - } - - base64Content := base64.StdEncoding.EncodeToString(fileContents) - client := getRestyClient() - uploadUrl := getJupyterBaseUrl(result.Url) + "api/contents/" + path.Base(task.BootFile) - res, err := client.R(). - SetCookies(cookies). - SetHeader("X-XSRFToken", xsrf). - SetBody(map[string]interface{}{ - "type": "file", - "format": "base64", - "name": path.Base(task.BootFile), - "path": path.Base(task.BootFile), - "content": base64Content}). - Put(uploadUrl) - if err != nil { - log.Error("upload jupyter file failed:%v", task.DisplayJobName, err) - } else if res.StatusCode() != http.StatusCreated { - log.Error("upload jupyter file failed:%v", task.DisplayJobName, err) - } - - } - -} - -func getJupyterBaseUrl(url string) string { - jupyterUrlLength := len(url) - baseUrl := url[0 : jupyterUrlLength-len(path.Base(url))] - return baseUrl -} - -func getCookiesAndCsrf(jupyterUrl string) ([]*http.Cookie, string) { - log.Info("jupyter url:" + jupyterUrl) - var cookies []*http.Cookie - const retryTimes = 10 - for i := 0; i < retryTimes; i++ { - res, err := http.Get(jupyterUrl) - if err != nil { - log.Error("browser jupyterUrl failed.", err) - if i == retryTimes-1 { - return cookies, "" - } - - } else { - cookies = res.Cookies() - xsrf := "" - for _, cookie := range cookies { - if cookie.Name == "_xsrf" { - xsrf = cookie.Value - break - } - - } - if xsrf != "" { - return cookies, xsrf - } - - } - } - return cookies, "" - -} - func SyncTempStatusJob() { jobs, err := models.GetCloudBrainTempJobs() if err != nil { diff --git a/modules/modelarts_cd/modelarts.go b/modules/modelarts_cd/modelarts.go index bdc42002a..9d74c0919 100755 --- a/modules/modelarts_cd/modelarts.go +++ b/modules/modelarts_cd/modelarts.go @@ -148,6 +148,7 @@ func GenerateNotebook(ctx *context.Context, req cloudbrain.GenerateModelArtsNote UpdatedUnix: createTime, Spec: req.Spec, BootFile: req.BootFile, + BranchName: req.BranchName, ModelName: req.ModelName, ModelVersion: req.ModelVersion, LabelName: req.LabelName, diff --git a/modules/notebook/contentManager.go b/modules/notebook/contentManager.go new file mode 100644 index 000000000..823e59665 --- /dev/null +++ b/modules/notebook/contentManager.go @@ -0,0 +1,198 @@ +package notebook + +import ( + "crypto/tls" + "encoding/base64" + "fmt" + "io/ioutil" + "net/http" + "path" + "strings" + + "github.com/go-resty/resty/v2" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/setting" + + "code.gitea.io/gitea/modules/log" +) + +var restyClient *resty.Client + +type NotebookApiResponse struct { + Name string `json:"name"` + Path string `json:"path"` +} + +type NotebookContent struct { + Url string + Path string + Cookies []*http.Cookie + Xsrf string + PathType string //file directory + Token string +} + +func (c *NotebookContent) IsNotebookFileCanBrowser() bool { + if c.Xsrf == "" { + c.SetCookiesAndCsrf() + } + if c.Xsrf == "" { + log.Warn("xsrf is empty, can not broswer url:" + c.Url) + return false + } + return c.IsNoteBookContentsExist() + +} + +func (c *NotebookContent) SetCookiesAndCsrf() { + log.Info("jupyter url:" + c.Url) + var cookies []*http.Cookie + const retryTimes = 10 + url := c.Url + if c.Token != "" { + url = c.Url + "?token=" + c.Token + } + for i := 0; i < retryTimes; i++ { + res, err := http.Get(url) + if err != nil { + log.Error("browser jupyterUrl failed.", err) + if i == retryTimes-1 { + c.Cookies = cookies + } + + } else { + cookies = res.Cookies() + xsrf := "" + for _, cookie := range cookies { + if cookie.Name == "_xsrf" { + xsrf = cookie.Value + if len(cookies) > 1 { + break + } + + } + + } + if xsrf != "" { + c.Cookies = cookies + c.Xsrf = xsrf + } + + } + } + c.Cookies = cookies + +} + +func (c *NotebookContent) IsNoteBookContentsExist() bool { + client := getRestyClient() + uploadUrl := getJupyterBaseUrl(c.Url) + "api/contents/" + c.Path + "?type=" + c.PathType + res, err := client.R(). + SetCookies(c.Cookies). + SetHeader("X-XSRFToken", c.Xsrf). + Get(uploadUrl) + if err != nil { + log.Warn("browser url error:"+uploadUrl, err) + return false + } + return res.StatusCode() == http.StatusOK +} + +func (c *NotebookContent) UploadNoteBookFile(task *models.Cloudbrain) error { + + err := c.MakeNoteBookDir() + if err != nil { + return err + } + + codePath := setting.JobPath + task.JobName + cloudbrain.CodeMountPath + fileContents, err := ioutil.ReadFile(codePath + "/" + c.Path) + if err != nil { + log.Error("read jupyter file failed:%v", task.DisplayJobName, err) + } + + base64Content := base64.StdEncoding.EncodeToString(fileContents) + client := getRestyClient() + uploadUrl := getJupyterBaseUrl(c.Url) + "api/contents/" + c.Path + res, err := client.R(). + SetCookies(c.Cookies). + SetHeader("X-XSRFToken", c.Xsrf). + SetBody(map[string]interface{}{ + "type": "file", + "format": "base64", + "name": path.Base(c.Path), + "path": c.Path, + "content": base64Content}). + Put(uploadUrl) + if err != nil { + log.Error("upload jupyter file failed:%v", task.DisplayJobName, err) + return err + } else if res.StatusCode() != http.StatusCreated { + log.Error("upload jupyter file failed:%v, status is %s", task.DisplayJobName, res.Status(), err) + return fmt.Errorf("status:", res.StatusCode()) + } + return nil +} + +/** + if c.Path is a/b/c.txt + makedir a/b + if c.Path is a/b/c + makedir a/b +*/ +func (c *NotebookContent) MakeNoteBookDir() error { + filePaths := strings.Split(c.Path, "/") + + for i := 0; i < len(filePaths)-1; i++ { + cTemp := &NotebookContent{ + Url: c.Url, + Cookies: c.Cookies, + Path: path.Join(filePaths[0 : i+1]...), + PathType: "directory", + Xsrf: c.Xsrf, + } + if !cTemp.IsNoteBookContentsExist() { + + createTempDirUrl := getJupyterBaseUrl(cTemp.Url) + "api/contents/" + cTemp.Path + client := getRestyClient() + var jobResult NotebookApiResponse + res, err := client.R(). + SetCookies(c.Cookies). + SetHeader("X-XSRFToken", c.Xsrf). + SetBody(map[string]interface{}{ + "type": cTemp.PathType, + "path": cTemp.Path, + }).SetResult(&jobResult). + Put(createTempDirUrl) + if err != nil { + return err + } + if res.StatusCode() != http.StatusCreated { + return fmt.Errorf("status code:" + res.Status()) + } + + } + + } + return nil +} + +func getJupyterBaseUrl(url string) string { + jupyterUrlLength := len(url) + baseUrl := url + if strings.HasSuffix(url, "lab") { + baseUrl = url[0 : jupyterUrlLength-len(path.Base(url))] + } + + return baseUrl +} + +func getRestyClient() *resty.Client { + if restyClient == nil { + restyClient = resty.New() + restyClient.SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true}) + } + return restyClient +} diff --git a/modules/repository/repo.go b/modules/repository/repo.go index 3268cce60..43427893f 100644 --- a/modules/repository/repo.go +++ b/modules/repository/repo.go @@ -139,6 +139,9 @@ func MigrateRepositoryGitData(doer, u *models.User, repo *models.Repository, opt } repo.IsMirror = true + if repo.Description == "" { + repo.Description = opts.Description + } err = models.UpdateRepository(repo, false) } else { repo, err = CleanUpMigrateInfo(repo) diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 43a841f18..09e7259f2 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -518,6 +518,7 @@ var ( MaxDatasetNum int CullIdleTimeout string CullInterval string + DebugAttachSize int //benchmark config IsBenchmarkEnabled bool @@ -543,6 +544,12 @@ var ( BrainScoreName string BrainScoreServerHost string + IsSnn4EcosetEnabled bool + Snn4EcosetOwner string + Snn4EcosetName string + Snn4EcosetServerHost string + Snn4AttachmentName string + //blockchain config BlockChainHost string CommitValidDate string @@ -1497,6 +1504,7 @@ func NewContext() { MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) CullIdleTimeout = sec.Key("CULL_IDLE_TIMEOUT").MustString("900") CullInterval = sec.Key("CULL_INTERVAL").MustString("60") + DebugAttachSize = sec.Key("DEBUG_ATTACH_SIZE").MustInt(20) sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) @@ -1522,6 +1530,13 @@ func NewContext() { BrainScoreName = sec.Key("NAME").MustString("") BrainScoreServerHost = sec.Key("HOST").MustString("") + sec = Cfg.Section("snn4ecoset") + IsSnn4EcosetEnabled = sec.Key("ENABLED").MustBool(false) + Snn4EcosetOwner = sec.Key("OWNER").MustString("") + Snn4EcosetName = sec.Key("NAME").MustString("") + Snn4EcosetServerHost = sec.Key("HOST").MustString("") + Snn4AttachmentName = sec.Key("DATASET").MustString("") + sec = Cfg.Section("blockchain") BlockChainHost = sec.Key("HOST").MustString("http://192.168.136.66:3302/") CommitValidDate = sec.Key("COMMIT_VALID_DATE").MustString("2021-01-15") diff --git a/modules/structs/cloudbrain.go b/modules/structs/cloudbrain.go index 9ea5601c9..cba297dc0 100644 --- a/modules/structs/cloudbrain.go +++ b/modules/structs/cloudbrain.go @@ -47,36 +47,37 @@ type CreateFileNotebookJobOption struct { BranchName string `json:"branch_name" binding:"Required"` OwnerName string `json:"owner_name" binding:"Required"` ProjectName string `json:"project_name" binding:"Required"` + JobId string `json:"job_id"` } type Cloudbrain struct { - ID int64 `json:"id"` - JobID string `json:"job_id"` - JobType string `json:"job_type"` - Type int `json:"type"` - DisplayJobName string `json:"display_job_name"` - Status string `json:"status"` - CreatedUnix int64 `json:"created_unix"` - RepoID int64 `json:"repo_id"` - Duration int64 `json:"duration"` //运行时长 单位秒 - TrainJobDuration string `json:"train_job_duration"` - ImageID string `json:"image_id"` //grampus image_id - Image string `json:"image"` - Uuid string `json:"uuid"` //数据集id - DatasetName string `json:"dataset_name"` - ComputeResource string `json:"compute_resource"` //计算资源,例如npu - AiCenter string `json:"ai_center"` //grampus ai center: center_id+center_name - BranchName string `json:"branch_name"` //分支名称 - Parameters string `json:"parameters"` //传给modelarts的param参数 - BootFile string `json:"boot_file"` //启动文件 - Description string `json:"description"` //描述 - ModelName string `json:"model_name"` //模型名称 - ModelVersion string `json:"model_version"` //模型版本 - CkptName string `json:"ckpt_name"` //权重文件名称 - StartTime int64 `json:"start_time"` - EndTime int64 `json:"end_time"` - - Spec *SpecificationShow `json:"spec"` + ID int64 `json:"id"` + JobID string `json:"job_id"` + JobType string `json:"job_type"` + Type int `json:"type"` + DisplayJobName string `json:"display_job_name"` + Status string `json:"status"` + CreatedUnix int64 `json:"created_unix"` + RepoID int64 `json:"repo_id"` + Duration int64 `json:"duration"` //运行时长 单位秒 + TrainJobDuration string `json:"train_job_duration"` + ImageID string `json:"image_id"` //grampus image_id + Image string `json:"image"` + Uuid string `json:"uuid"` //数据集id + DatasetName string `json:"dataset_name"` + ComputeResource string `json:"compute_resource"` //计算资源,例如npu + AiCenter string `json:"ai_center"` //grampus ai center: center_id+center_name + BranchName string `json:"branch_name"` //分支名称 + Parameters string `json:"parameters"` //传给modelarts的param参数 + BootFile string `json:"boot_file"` //启动文件 + Description string `json:"description"` //描述 + ModelName string `json:"model_name"` //模型名称 + ModelVersion string `json:"model_version"` //模型版本 + CkptName string `json:"ckpt_name"` //权重文件名称 + StartTime int64 `json:"start_time"` + EndTime int64 `json:"end_time"` + VersionName string `json:"version_name"` + Spec *SpecificationShow `json:"spec"` } type SpecificationShow struct { diff --git a/modules/templates/helper.go b/modules/templates/helper.go index 9d31952d6..88c63c8c7 100755 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -98,11 +98,15 @@ func NewFuncMap() []template.FuncMap { "AllowedReactions": func() []string { return setting.UI.Reactions }, + "DebugAttachSize": func() int { + return setting.DebugAttachSize * 1000 * 1000 * 1000 + }, "AvatarLink": models.AvatarLink, "Safe": Safe, "SafeJS": SafeJS, "Str2html": Str2html, "subOne": subOne, + "addOne": addOne, "TimeSince": timeutil.TimeSince, "TimeSinceUnix": timeutil.TimeSinceUnix, "TimeSinceUnix1": timeutil.TimeSinceUnix1, @@ -153,7 +157,7 @@ func NewFuncMap() []template.FuncMap { "EscapePound": func(str string) string { return strings.NewReplacer("%", "%25", "#", "%23", " ", "%20", "?", "%3F").Replace(str) }, - "IpynbBool":func(str string) bool{ + "IpynbBool": func(str string) bool { return strings.Contains(str, ".ipynb") }, "nl2br": func(text string) template.HTML { @@ -470,7 +474,9 @@ func Str2html(raw string) template.HTML { func subOne(length int) int { return length - 1 } - +func addOne(length int64) int64 { + return length + 1 +} // Escape escapes a HTML string func Escape(raw string) string { return html.EscapeString(raw) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index fcb20597c..13c9868b5 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -579,6 +579,7 @@ static.CloudBrainTaskNum=CloudBrain Task Count static.CloudBrainRunTime=CloudBrain Run Time static.CommitDatasetNum=Commit Dataset Count static.CommitModelCount=Commit Model Count +static.ModelConvertCount=Model Convert Count static.UserIndex=Normalized user index static.UserIndexPrimitive=User Index static.countdate=Count Date @@ -1063,7 +1064,9 @@ model_rename=Duplicate model name, please modify model name. notebook_file_not_exist=Notebook file does not exist. notebook_select_wrong=Please select a Notebook(.ipynb) file first. notebook_file_no_right=You have no right to access the Notebook(.ipynb) file. +notebook_repo_conflict=The files in different branches of the same repository can not run together. debug_again_fail=Fail to restart debug task, please try again later. +debug_again_fail_forever=The task was scheduled failed last time, can not restart. date=Date repo_add=Project Increment @@ -1081,6 +1084,7 @@ delete=Delete more=More gpu_type_all=All model_download=Model Download +all_result_download=All result download submit_image=Submit Image modify_image=Modify Image image_exist=Image name has been used, please use a new one. @@ -1093,8 +1097,8 @@ image_delete_fail=Failed to delete image, please try again later. image_overwrite=You had submitted the same name image before, are you sure to overwrite the original image? download=Download score=Score -wait_count_start = There are currently -wait_count_end = tasks queued +wait_count_start = Your current queue position is +wait_count_end = file_limit_100 = Display up to 100 files or folders in a single directory images.name = Image Tag images.name_placerholder = Please enter the image name @@ -1259,6 +1263,13 @@ modelarts.fullscreen_log_file = View in full screen modelarts.exit_full_screen = Exit fullscreen modelarts.no_node_right = The value of 'Amount of Compute Node' is wrong, you have no right to use the current value of 'Amount of Compute Node'. +scrolled_logs_top = You have scrolled to the top of the log +scrolled_logs_top_pls_retry = You have scrolled to the top of the log, please try again later! +scrolled_logs_bottom = You have scrolled to the bottom of the log +scrolled_logs_bottom_pls_retry = You have scrolled to the bottom of the log, please try again later! + +canceled_operation = You have canceled the operation +successfully_deleted = Successfully deleted debug_task_not_created = Debug task has not been created train_task_not_created = Train task has not been created diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 86d445871..6c0b9e4ac 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -583,6 +583,7 @@ static.CloudBrainTaskNum=云脑任务数 static.CloudBrainRunTime=云脑运行时间(小时) static.CommitDatasetNum=上传(提交)数据集文件数 static.CommitModelCount=提交模型数 +static.ModelConvertCount=模型转换数 static.UserIndex=归一化用户指数 static.UserIndexPrimitive=用户指数 static.countdate=系统统计时间 @@ -1062,7 +1063,9 @@ model_rename=模型名称重复,请修改模型名称 notebook_file_not_exist=Notebook文件不存在。 notebook_select_wrong=请先选择Notebook(.ipynb)文件。 notebook_file_no_right=您没有这个Notebook文件的读权限。 +notebook_repo_conflict=同一个仓库的不同分支文件不能同时运行。 debug_again_fail=再次调试失败,请稍后再试。 +debug_again_fail_forever=这个任务之前没有调度成功,不能再次调试。 date=日期 repo_add=新增项目 @@ -1080,6 +1083,7 @@ delete=删除 more=更多 gpu_type_all=全部 model_download=结果下载 +all_result_download=全部结果下载 submit_image=提交镜像 modify_image=修改镜像 image_exist=镜像Tag已被使用,请修改镜像Tag。 @@ -1092,8 +1096,8 @@ image_delete_fail=删除镜像失败,请稍后再试。 image_overwrite=您已经提交过相同名称的镜像,您确定要覆盖原来提交的镜像吗? download=模型下载 score=评分 -wait_count_start = 当前有 -wait_count_end = 个任务正在排队 +wait_count_start = 您当前排队位置是第 +wait_count_end = 位 file_limit_100 = 单目录下最多显示100个文件或文件夹 images.name = 镜像Tag images.name_placerholder = 请输入镜像Tag @@ -1271,6 +1275,13 @@ modelarts.fullscreen_log_file=全屏查看 modelarts.exit_full_screen=退出全屏 modelarts.no_node_right = 计算节点数的值配置错误,您没有权限使用当前配置的计算节点数。 +scrolled_logs_top = 您已翻阅至日志顶部 +scrolled_logs_top_pls_retry = 您已翻阅至日志顶部,请稍后再试! +scrolled_logs_bottom = 您已翻阅至日志底部 +scrolled_logs_bottom_pls_retry = 您已翻阅至日志底部,请稍后再试! + +canceled_operation = 您已取消操作 +successfully_deleted = 删除成功 debug_task_not_created = 未创建过调试任务 train_task_not_created = 未创建过训练任务 @@ -1674,7 +1685,7 @@ issues.action_assignee_no_select=未指派 issues.opened_by=由 %[3]s 于 %[1]s创建 pulls.merged_by=由 %[3]s 于 %[1]s 合并 pulls.merged_by_fake=由 %[2]s 于 %[1]s 合并 -issues.closed_by=按 %[3]s 关闭%[1]s +issues.closed_by=由 %[3]s 创建,被关闭于 %[1]s issues.opened_by_fake=由 %[2]s 于 %[1]s创建 issues.closed_by_fake=通过 %[2]s 关闭 %[1]s issues.previous=上一页 @@ -3327,7 +3338,7 @@ Stopped_failed=任务停止失败,请稍后再试。 Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。 load_code_failed=代码加载失败,请确认选择了正确的分支。 - +error.debug_datasetsize = 数据集大小超过限制('%d'GB) error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集 new_train_gpu_tooltips = 训练脚本存储在 %s 中,数据集存储在 %s 中,预训练模型存放在运行参数 %s 中,训练输出请存储在 %s 中以供后续下载。 new_debug_gpu_tooltips = 项目代码存储在 %s 中,数据集存储在 %s 中,选择的模型存储在 %s 中,调试输出请存储在 %s 中以供后续下载。 diff --git a/package-lock.json b/package-lock.json index 7dc42d04c..ee922450a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1982,28 +1982,6 @@ "object.assign": "^4.1.0" } }, - "babel-polyfill": { - "version": "6.26.0", - "resolved": "https://registry.npm.taobao.org/babel-polyfill/download/babel-polyfill-6.26.0.tgz", - "integrity": "sha1-N5k3q8Z9eJWXCtxiHyhM2WbPIVM=", - "requires": { - "babel-runtime": "^6.26.0", - "core-js": "^2.5.0", - "regenerator-runtime": "^0.10.5" - }, - "dependencies": { - "core-js": { - "version": "2.6.12", - "resolved": "https://registry.npm.taobao.org/core-js/download/core-js-2.6.12.tgz?cache=0&sync_timestamp=1611040749668&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Fcore-js%2Fdownload%2Fcore-js-2.6.12.tgz", - "integrity": "sha1-2TM9+nsGXjR8xWgiGdb2kIWcwuw=" - }, - "regenerator-runtime": { - "version": "0.10.5", - "resolved": "https://registry.npm.taobao.org/regenerator-runtime/download/regenerator-runtime-0.10.5.tgz", - "integrity": "sha1-M2w+/BIgrc7dosn6tntaeVWjNlg=" - } - } - }, "babel-runtime": { "version": "6.26.0", "resolved": "https://registry.npmjs.org/babel-runtime/-/babel-runtime-6.26.0.tgz", @@ -2194,9 +2172,9 @@ "integrity": "sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==" }, "blueimp-md5": { - "version": "2.18.0", - "resolved": "https://registry.npm.taobao.org/blueimp-md5/download/blueimp-md5-2.18.0.tgz", - "integrity": "sha1-EVK+EzXwxrORHtnjbbVPPmrFKTU=" + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/blueimp-md5/-/blueimp-md5-2.19.0.tgz", + "integrity": "sha512-DRQrD6gJyy8FbiE4s+bDoXS9hiW3Vbx5uCdwvcCf3zLHL+Iv7LtGHLpr+GZV8rHG8tK766FGYBwRbu8pELTt+w==" }, "bn.js": { "version": "5.1.1", @@ -3536,6 +3514,11 @@ "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-2.2.0.tgz", "integrity": "sha1-QGXiATz5+5Ft39gu+1Bq1MZ2kGI=" }, + "dayjs": { + "version": "1.10.7", + "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.10.7.tgz", + "integrity": "sha512-P6twpd70BcPK34K26uJ1KT3wlhpuOAPoMwJzpsIWUxHZ7wpmbdZL/hQqBDfz7hGurYSa5PhzdhDHtt319hL3ig==" + }, "de-indent": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/de-indent/-/de-indent-1.0.2.tgz", @@ -4152,25 +4135,29 @@ "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=" }, "esdk-obs-browserjs": { - "version": "3.20.7", - "resolved": "https://registry.npm.taobao.org/esdk-obs-browserjs/download/esdk-obs-browserjs-3.20.7.tgz", - "integrity": "sha1-vhziRlKEhW3PgZPl0DyX68bJI0s=", + "version": "3.22.3", + "resolved": "https://registry.npmjs.org/esdk-obs-browserjs/-/esdk-obs-browserjs-3.22.3.tgz", + "integrity": "sha512-MATZXp0FwjPtKG9tpdfURa3koUarR/ev+tbO0oUKgj0GRt0798ZxmfCvYvRpgNst4w1ht4E79ikD4H40UYLgPA==", "requires": { - "axios": "^0.19.0", - "babel-polyfill": "^6.26.0", - "blueimp-md5": "^2.10.0", - "js-base64": "^2.3.2", - "jssha": "^2.3.1", - "urijs": "^1.19.1" + "axios": "^0.26.1", + "blueimp-md5": "^2.18.0", + "js-base64": "^3.7.1", + "jssha": "^3.2.0", + "urijs": "^1.19.7" }, "dependencies": { "axios": { - "version": "0.19.2", - "resolved": "https://registry.npm.taobao.org/axios/download/axios-0.19.2.tgz?cache=0&sync_timestamp=1608609215811&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Faxios%2Fdownload%2Faxios-0.19.2.tgz", - "integrity": "sha1-PqNsXYgY0NX4qKl6bTa4bNwAyyc=", + "version": "0.26.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", + "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", "requires": { - "follow-redirects": "1.5.10" + "follow-redirects": "^1.14.8" } + }, + "js-base64": { + "version": "3.7.3", + "resolved": "https://registry.npmjs.org/js-base64/-/js-base64-3.7.3.tgz", + "integrity": "sha512-PAr6Xg2jvd7MCR6Ld9Jg3BmTcjYsHEBx1VlwEwULb/qowPf5VD9kEMagj23Gm7JRnSvE/Da/57nChZjnvL8v6A==" } } }, @@ -5382,27 +5369,9 @@ } }, "follow-redirects": { - "version": "1.5.10", - "resolved": "https://registry.npm.taobao.org/follow-redirects/download/follow-redirects-1.5.10.tgz?cache=0&sync_timestamp=1611606737937&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Ffollow-redirects%2Fdownload%2Ffollow-redirects-1.5.10.tgz", - "integrity": "sha1-e3qfmuov3/NnhqlP9kPtB/T/Xio=", - "requires": { - "debug": "=3.1.0" - }, - "dependencies": { - "debug": { - "version": "3.1.0", - "resolved": "https://registry.npm.taobao.org/debug/download/debug-3.1.0.tgz?cache=0&sync_timestamp=1607566533140&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Fdebug%2Fdownload%2Fdebug-3.1.0.tgz", - "integrity": "sha1-W7WgZyYotkFJVmuhaBnmFRjGcmE=", - "requires": { - "ms": "2.0.0" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - } - } + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", + "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==" }, "fomantic-ui": { "version": "2.8.4", @@ -7884,9 +7853,9 @@ } }, "jssha": { - "version": "2.4.2", - "resolved": "https://registry.npm.taobao.org/jssha/download/jssha-2.4.2.tgz", - "integrity": "sha1-2VCwlWNJKL1rK9odQtqaOnYtZek=" + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/jssha/-/jssha-3.3.0.tgz", + "integrity": "sha512-w9OtT4ALL+fbbwG3gw7erAO0jvS5nfvrukGPMWIAoea359B26ALXGpzy4YJSp9yGnpUvuvOw1nSjSoHDfWSr1w==" }, "just-debounce": { "version": "1.0.0", @@ -14400,9 +14369,9 @@ } }, "urijs": { - "version": "1.19.6", - "resolved": "https://registry.npmjs.org/urijs/-/urijs-1.19.6.tgz", - "integrity": "sha512-eSXsXZ2jLvGWeLYlQA3Gh36BcjF+0amo92+wHPyN1mdR8Nxf75fuEuYTd9c0a+m/vhCjRK0ESlE9YNLW+E1VEw==" + "version": "1.19.11", + "resolved": "https://registry.npmjs.org/urijs/-/urijs-1.19.11.tgz", + "integrity": "sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==" }, "urix": { "version": "0.1.0", diff --git a/routers/admin/cloudbrains.go b/routers/admin/cloudbrains.go index 91685251b..2002e69f4 100755 --- a/routers/admin/cloudbrains.go +++ b/routers/admin/cloudbrains.go @@ -53,7 +53,7 @@ func CloudBrains(ctx *context.Context) { var jobTypes []string jobTypeNot := false if jobType == string(models.JobTypeBenchmark) { - jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet)) + jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet), string(models.JobTypeSnn4Ecoset)) } else if jobType != "all" && jobType != "" { jobTypes = append(jobTypes, jobType) } diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 4936c2362..ba7346481 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -549,6 +549,8 @@ func RegisterRoutes(m *macaron.Macaron) { }, reqToken()) + m.Get("/compute-nodes", reqToken(), user.GetComputeNodes) + // Notifications m.Group("/notifications", func() { m.Combo(""). @@ -745,7 +747,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/file_notebook", func() { m.Get("", repo.GetFileNoteBookInfo) m.Post("/create", reqToken(), reqWeChat(), bind(api.CreateFileNotebookJobOption{}), repo.CreateFileNoteBook) - + m.Post("/status", reqToken(), bind(api.CreateFileNotebookJobOption{}), repo.FileNoteBookStatus) }) m.Group("/repos", func() { @@ -1024,6 +1026,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/query_model_byName", repo.QueryModelByName) m.Get("/query_model_for_predict", repo.QueryModelListForPredict) m.Get("/query_modelfile_for_predict", repo.QueryModelFileForPredict) + m.Get("/query_train_job", repo.QueryTrainJobList) + m.Get("/query_train_job_version", repo.QueryTrainJobVersionList) m.Get("/query_train_model", repo.QueryTrainModelList) m.Post("/create_model_convert", repo.CreateModelConvert) m.Post("/convert_stop", repo.StopModelConvert) diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index 805443788..250a08423 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -110,6 +110,9 @@ func GeneralCloudBrainJobStop(ctx *context.APIContext) { func CreateFileNoteBook(ctx *context.APIContext, option api.CreateFileNotebookJobOption) { cloudbrainTask.FileNotebookCreate(ctx.Context, option) } +func FileNoteBookStatus(ctx *context.APIContext, option api.CreateFileNotebookJobOption) { + cloudbrainTask.FileNotebookStatus(ctx.Context, option) +} func GetFileNoteBookInfo(ctx *context.APIContext) { //image description spec description waiting count diff --git a/routers/api/v1/repo/cloudbrain_dashboard.go b/routers/api/v1/repo/cloudbrain_dashboard.go index bb04038b9..1fbef897b 100755 --- a/routers/api/v1/repo/cloudbrain_dashboard.go +++ b/routers/api/v1/repo/cloudbrain_dashboard.go @@ -623,7 +623,7 @@ func GetAllCloudbrainsPeriodDistribution(ctx *context.Context) { } jobTypeList := []string{string(models.JobTypeDebug), string(models.JobTypeTrain), string(models.JobTypeInference), string(models.JobTypeBenchmark), - string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet)} + string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet), string(models.JobTypeSnn4Ecoset)} for _, v := range jobTypeList { if _, ok := cloudOneJobTypeRes[v]; !ok { cloudOneJobTypeRes[v] = 0 @@ -645,7 +645,7 @@ func GetAllCloudbrainsPeriodDistribution(ctx *context.Context) { } } - ComputeResourceList := []string{"CPU/GPU", "NPU"} + ComputeResourceList := []string{"CPU/GPU", "NPU", "GCU"} for _, v := range ComputeResourceList { if _, ok := cloudBrainComputeResource[v]; !ok { cloudBrainComputeResource[v] = 0 @@ -687,7 +687,6 @@ func GetCloudbrainsDetailData(ctx *context.Context) { return } recordBeginTime := recordCloudbrain[0].Cloudbrain.CreatedUnix - endTime := time.Now() listType := ctx.Query("listType") jobType := ctx.Query("jobType") jobStatus := ctx.Query("jobStatus") @@ -695,6 +694,33 @@ func GetCloudbrainsDetailData(ctx *context.Context) { aiCenter := ctx.Query("aiCenter") needDeleteInfo := ctx.Query("needDeleteInfo") + accCardType := ctx.Query("accCardType") + accCardsNum := ctx.QueryInt("accCardsNum") + workServerNumber := ctx.QueryInt("workServerNumber") + beginTimeStr := ctx.QueryTrim("beginTime") + endTimeStr := ctx.QueryTrim("endTime") + var beginTimeUnix int64 + var endTimeUnix int64 + if beginTimeStr == "" || endTimeStr == "" { + beginTimeUnix = int64(recordBeginTime) + endTimeUnix = time.Now().Unix() + } else { + beginTime, err := time.ParseInLocation("2006-01-02T15:04:05", beginTimeStr, time.Local) + if err != nil { + log.Error("Can not ParseInLocation.", err) + ctx.Error(http.StatusBadRequest, ctx.Tr("ParseInLocation_get_error")) + return + } + beginTimeUnix = beginTime.Unix() + endTime, err := time.ParseInLocation("2006-01-02T15:04:05", endTimeStr, time.Local) + if err != nil { + log.Error("Can not ParseInLocation.", err) + ctx.Error(http.StatusBadRequest, ctx.Tr("ParseInLocation_get_error")) + return + } + endTimeUnix = endTime.Unix() + } + if cloudBrainType == models.TypeCloudBrainOne && aiCenter == models.AICenterOfCloudBrainOne { aiCenter = "" } @@ -730,7 +756,7 @@ func GetCloudbrainsDetailData(ctx *context.Context) { var jobTypes []string jobTypeNot := false if jobType == string(models.JobTypeBenchmark) { - jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet)) + jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet), string(models.JobTypeSnn4Ecoset)) } else if jobType != "all" && jobType != "" { jobTypes = append(jobTypes, jobType) } @@ -753,18 +779,21 @@ func GetCloudbrainsDetailData(ctx *context.Context) { Page: page, PageSize: pageSize, }, - Keyword: keyword, - Type: cloudBrainType, - ComputeResource: listType, - JobTypeNot: jobTypeNot, - JobStatusNot: jobStatusNot, - JobStatus: jobStatuses, - JobTypes: jobTypes, - NeedRepoInfo: true, - BeginTimeUnix: int64(recordBeginTime), - EndTimeUnix: endTime.Unix(), - AiCenter: aiCenter, - NeedDeleteInfo: needDeleteInfo, + Keyword: keyword, + Type: cloudBrainType, + ComputeResource: listType, + JobTypeNot: jobTypeNot, + JobStatusNot: jobStatusNot, + JobStatus: jobStatuses, + JobTypes: jobTypes, + NeedRepoInfo: true, + BeginTimeUnix: beginTimeUnix, + EndTimeUnix: endTimeUnix, + AiCenter: aiCenter, + NeedDeleteInfo: needDeleteInfo, + AccCardType: accCardType, + AccCardsNum: accCardsNum, + WorkServerNumber: workServerNumber, }) if err != nil { ctx.ServerError("Get job failed:", err) @@ -1039,7 +1068,7 @@ func getCloudbrainCount(beginTime time.Time, endTime time.Time, cloudbrains []*m } } - ComputeResourceList := []string{"CPU/GPU", "NPU"} + ComputeResourceList := []string{"CPU/GPU", "NPU", "GCU"} for _, v := range ComputeResourceList { if _, ok := cloudBrainComputeResource[v]; !ok { cloudBrainComputeResource[v] = 0 diff --git a/routers/api/v1/repo/modelmanage.go b/routers/api/v1/repo/modelmanage.go index 3b0aed0d5..696231fea 100644 --- a/routers/api/v1/repo/modelmanage.go +++ b/routers/api/v1/repo/modelmanage.go @@ -4,8 +4,10 @@ import ( "net/http" "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/convert" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/storage" + api "code.gitea.io/gitea/modules/structs" routerRepo "code.gitea.io/gitea/routers/repo" ) @@ -54,6 +56,21 @@ func QueryModelListForPredict(ctx *context.APIContext) { routerRepo.QueryModelListForPredict(ctx.Context) } +func QueryTrainJobList(ctx *context.APIContext) { + result, err := routerRepo.QueryTrainJobListApi(ctx.Context) + if err != nil { + log.Info("query error." + err.Error()) + ctx.JSON(http.StatusOK, nil) + } else { + re := make([]*api.Cloudbrain, 0) + for _, task := range result { + conRe := convert.ToCloudBrain(task) + re = append(re, conRe) + } + ctx.JSON(http.StatusOK, re) + } +} + func QueryTrainModelList(ctx *context.APIContext) { result, err := routerRepo.QueryTrainModelFileById(ctx.Context) if err != nil { @@ -63,6 +80,21 @@ func QueryTrainModelList(ctx *context.APIContext) { ctx.JSON(http.StatusOK, re) } +func QueryTrainJobVersionList(ctx *context.APIContext) { + result, err := routerRepo.QueryTrainJobVersionListApi(ctx.Context) + if err != nil { + log.Info("query error." + err.Error()) + ctx.JSON(http.StatusOK, nil) + } else { + re := make([]*api.Cloudbrain, 0) + for _, task := range result { + conRe := convert.ToCloudBrain(task) + re = append(re, conRe) + } + ctx.JSON(http.StatusOK, re) + } +} + func convertFileFormat(result []storage.FileInfo) []FileInfo { re := make([]FileInfo, 0) if result != nil { diff --git a/routers/api/v1/user/repo.go b/routers/api/v1/user/repo.go index 4024bf96c..e9b8c6675 100644 --- a/routers/api/v1/user/repo.go +++ b/routers/api/v1/user/repo.go @@ -5,6 +5,7 @@ package user import ( + "code.gitea.io/gitea/modules/modelarts" "net/http" "code.gitea.io/gitea/models" @@ -146,3 +147,22 @@ func ListOrgRepos(ctx *context.APIContext) { listUserRepos(ctx, ctx.Org.Organization, ctx.IsSigned) } + +func GetComputeNodes(ctx *context.APIContext) { + taskeType := ctx.QueryInt("type") + if taskeType == 2 { + ctx.JSON(http.StatusOK, []int{1}) + } else { + modelarts.InitMultiNode() + if modelarts.MultiNodeConfig != nil { + for _, info := range modelarts.MultiNodeConfig.Info { + if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, ctx.User.ID); isInOrg { + ctx.JSON(http.StatusOK, info.Node) + return + } + } + } + ctx.JSON(http.StatusOK, []int{1}) + } + +} diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 592194371..a074119fc 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -2,7 +2,6 @@ package repo import ( "archive/zip" - "code.gitea.io/gitea/services/repository" "encoding/json" "errors" "fmt" @@ -12,6 +11,8 @@ import ( "regexp" "strings" + "code.gitea.io/gitea/services/repository" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" @@ -710,36 +711,42 @@ func downloadFromCloudBrainTwo(path string, task *models.AiModelManage, ctx *con } func QueryTrainJobVersionList(ctx *context.Context) { + + VersionListTasks, err := QueryTrainJobVersionListApi(ctx) + if err != nil { + ctx.JSON(200, nil) + } else { + ctx.JSON(200, VersionListTasks) + } +} + +func QueryTrainJobVersionListApi(ctx *context.Context) ([]*models.Cloudbrain, error) { log.Info("query train job version list. start.") JobID := ctx.Query("jobId") if JobID == "" { JobID = ctx.Query("JobId") } - VersionListTasks, count, err := models.QueryModelTrainJobVersionList(JobID) - log.Info("query return count=" + fmt.Sprint(count)) + return VersionListTasks, err +} + +func QueryTrainJobList(ctx *context.Context) { + VersionListTasks, err := QueryTrainJobListApi(ctx) if err != nil { - ctx.ServerError("QueryTrainJobList:", err) + ctx.JSON(200, nil) } else { ctx.JSON(200, VersionListTasks) } } -func QueryTrainJobList(ctx *context.Context) { - log.Info("query train job list. start.") +func QueryTrainJobListApi(ctx *context.Context) ([]*models.Cloudbrain, error) { repoId := ctx.QueryInt64("repoId") - VersionListTasks, count, err := models.QueryModelTrainJobList(repoId) log.Info("query return count=" + fmt.Sprint(count)) - if err != nil { - ctx.ServerError("QueryTrainJobList:", err) - } else { - ctx.JSON(200, VersionListTasks) - } - + return VersionListTasks, err } func QueryTrainModelFileById(ctx *context.Context) ([]storage.FileInfo, error) { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 905c25a64..cf6df6312 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -285,7 +285,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } var datasetInfos map[string]models.DatasetInfo var datasetNames string - //var + var attachSize int if uuids != "" { datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids) if err != nil { @@ -294,6 +294,18 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } + + if jobType == string(models.JobTypeDebug) { + for _, infos := range datasetInfos { + attachSize += infos.Size + } + if attachSize > int(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) // GB + cloudBrainNewDataPrepare(ctx, jobType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tpl, &form) + return + } + } } command := cloudbrain.GetCloudbrainDebugCommand() @@ -387,7 +399,6 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { req.ModelVersion = form.ModelVersion req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl req.PreTrainModelUrl = form.PreTrainModelUrl - } _, err = cloudbrain.GenerateTask(req) @@ -735,6 +746,7 @@ func CloudBrainRestart(ctx *context.Context) { }) } + func hasDatasetDeleted(task *models.Cloudbrain) bool { if task.Uuid == "" { return false @@ -843,7 +855,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo if task.JobType == string(models.JobTypeBenchmark) { task.BenchmarkType = ctx.Tr("repo.cloudbrain.benchmark.algorithm") - } else if task.JobType == string(models.JobTypeSnn4imagenet) || task.JobType == string(models.JobTypeBrainScore) { + } else if models.IsModelBenchMarkJobType(task.JobType) { task.BenchmarkType = ctx.Tr("repo.cloudbrain.benchmark.model") task.BenchmarkTypeName = task.JobType ctx.Data["BenchmarkTypeName"] = task.JobType @@ -911,10 +923,13 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo func CloudBrainDebug(ctx *context.Context) { task := ctx.Cloudbrain debugUrl := setting.DebugServerHost + "jpylab_" + task.JobID + "_" + task.SubTaskName - if task.BootFile != "" { - ctx.Redirect(getFileUrl(debugUrl, task.BootFile)) + if ctx.QueryTrim("file") != "" { + ctx.Redirect(getFileUrl(debugUrl, ctx.QueryTrim("file"))) } else { + if task.BootFile != "" { + go cloudbrainTask.UploadNotebookFiles(task) + } ctx.Redirect(debugUrl) } @@ -1638,6 +1653,21 @@ func CloudBrainDownloadModel(ctx *context.Context) { ctx.Resp.Header().Set("Cache-Control", "max-age=0") http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) } + +func CloudBrainDownloadMultiModel(ctx *context.Context) { + parentDir := ctx.Query("parentDir") + jobName := ctx.Query("jobName") + filePath := "jobs/" + jobName + "/model/" + parentDir + allFile, err := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, filePath) + if err == nil { + returnFileName := jobName + ".zip" + MinioDownloadManyFile(filePath, ctx, returnFileName, allFile) + } else { + log.Info("error,msg=" + err.Error()) + ctx.ServerError("no file to download.", err) + } +} + func CloudBrainDownloadInferenceResult(ctx *context.Context) { parentDir := ctx.Query("parentDir") fileName := ctx.Query("fileName") @@ -1674,6 +1704,8 @@ func GetRate(ctx *context.Context) { ctx.Redirect(setting.Snn4imagenetServerHost) } else if job.JobType == string(models.JobTypeBrainScore) { ctx.Redirect(setting.BrainScoreServerHost) + } else if job.JobType == string(models.JobTypeSnn4Ecoset) { + ctx.Redirect(setting.Snn4EcosetServerHost) } else { log.Error("JobType error:%s", job.JobType, ctx.Data["msgID"]) } @@ -2146,7 +2178,7 @@ func CloudBrainBenchmarkIndex(ctx *context.Context) { } var jobTypes []string - jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet), string(models.JobTypeModelSafety)) + jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet), string(models.JobTypeSnn4Ecoset), string(models.JobTypeModelSafety)) ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, @@ -2179,14 +2211,16 @@ func CloudBrainBenchmarkIndex(ctx *context.Context) { ciTasks[i].BenchmarkTypeName = "" if ciTasks[i].JobType == string(models.JobTypeBenchmark) { ciTasks[i].BenchmarkType = ctx.Tr("repo.cloudbrain.benchmark.algorithm") - } else if ciTasks[i].JobType == string(models.JobTypeSnn4imagenet) || ciTasks[i].JobType == string(models.JobTypeBrainScore) { + } else if models.IsModelBenchMarkJobType(ciTasks[i].JobType) { ciTasks[i].BenchmarkType = ctx.Tr("repo.cloudbrain.benchmark.model") ciTasks[i].BenchmarkTypeName = ciTasks[i].JobType if ciTasks[i].JobType == string(models.JobTypeSnn4imagenet) { ciTasks[i].BenchmarkTypeRankLink = setting.Snn4imagenetServerHost - } else { + } else if ciTasks[i].JobType == string(models.JobTypeBrainScore) { ciTasks[i].BenchmarkTypeRankLink = setting.BrainScoreServerHost + } else { + ciTasks[i].BenchmarkTypeRankLink = setting.Snn4EcosetServerHost } } @@ -2536,7 +2570,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) displayJobName := form.DisplayJobName jobName := util.ConvertDisplayJobNameToJobName(displayJobName) image := form.Image - uuid := form.Attachment jobType := form.JobType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath branchName := cloudbrain.DefaultBranchName @@ -2578,7 +2611,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) return } - if jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) { + if !models.IsModelBenchMarkJobType(jobType) { log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("jobtype error", tpl, &form) @@ -2607,29 +2640,41 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) mkModelPath(modelPath) uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/") - snn4imagenetPath := setting.JobPath + jobName + cloudbrain.Snn4imagenetMountPath + benchmarkPath := setting.JobPath + jobName + cloudbrain.BenchMarkMountPath if setting.IsSnn4imagenetEnabled && jobType == string(models.JobTypeSnn4imagenet) { - downloadRateCode(repo, jobName, setting.Snn4imagenetOwner, setting.Snn4imagenetName, snn4imagenetPath, "", "", ctx.User.Name) - uploadCodeToMinio(snn4imagenetPath+"/", jobName, cloudbrain.Snn4imagenetMountPath+"/") - command = fmt.Sprintf(cloudbrain.Snn4imagenetCommand, displayJobName, trimSpaceNewlineInString(form.Description)) + downloadRateCode(repo, jobName, setting.Snn4imagenetOwner, setting.Snn4imagenetName, benchmarkPath, "", "", ctx.User.Name) + uploadCodeToMinio(benchmarkPath+"/", jobName, cloudbrain.BenchMarkMountPath+"/") + command = fmt.Sprintf(cloudbrain.Snn4imagenetCommand, displayJobName, form.CkptName, trimSpaceNewlineInString(form.Description)) } benchmarkChildTypeID := 0 - brainScorePath := setting.JobPath + jobName + cloudbrain.BrainScoreMountPath + if setting.IsBrainScoreEnabled && jobType == string(models.JobTypeBrainScore) { - downloadRateCode(repo, jobName, setting.BrainScoreOwner, setting.BrainScoreName, brainScorePath, "", "", ctx.User.Name) - uploadCodeToMinio(brainScorePath+"/", jobName, cloudbrain.BrainScoreMountPath+"/") + downloadRateCode(repo, jobName, setting.BrainScoreOwner, setting.BrainScoreName, benchmarkPath, "", "", ctx.User.Name) + uploadCodeToMinio(benchmarkPath+"/", jobName, cloudbrain.BenchMarkMountPath+"/") benchmarkChildTypeID = form.BenchmarkChildTypeID - command = fmt.Sprintf(cloudbrain.BrainScoreCommand, getBrainRegion(benchmarkChildTypeID), displayJobName, trimSpaceNewlineInString(form.Description)) + command = fmt.Sprintf(cloudbrain.BrainScoreCommand, getBrainRegion(benchmarkChildTypeID), displayJobName, form.CkptName, trimSpaceNewlineInString(form.Description)) } + var uuid string + var datasetInfos map[string]models.DatasetInfo + var datasetNames string + if setting.IsSnn4EcosetEnabled && jobType == string(models.JobTypeSnn4Ecoset) { + downloadRateCode(repo, jobName, setting.Snn4EcosetOwner, setting.Snn4EcosetName, benchmarkPath, "", "", ctx.User.Name) + uploadCodeToMinio(benchmarkPath+"/", jobName, cloudbrain.BenchMarkMountPath+"/") + command = fmt.Sprintf(cloudbrain.Snn4EcosetCommand, displayJobName, form.CkptName, trimSpaceNewlineInString(form.Description)) + + attachment, err := getEcosetAttachment() + if err != nil { + log.Error("load benchmark code failed", err) + cloudBrainNewDataPrepare(ctx, jobType) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) + return + } + uuid = attachment.UUID + datasetInfos, datasetNames, _ = models.GetDatasetInfo(uuid) - datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) - if err != nil { - log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) - return } + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ JobType: models.JobTypeBenchmark, ComputeResource: models.GPU, @@ -2661,8 +2706,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), - Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), - BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), JobType: jobType, Description: form.Description, BranchName: branchName, @@ -2674,6 +2717,14 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), Spec: spec, } + if form.ModelName != "" { + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl + req.PreTrainModelUrl = form.PreTrainModelUrl + } _, err = cloudbrain.GenerateTask(req) if err != nil { @@ -2685,6 +2736,21 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark") } +func getEcosetAttachment() (*models.Attachment, error) { + ecosetRepo, err := models.GetRepositoryByOwnerAndName(setting.Snn4EcosetOwner, setting.Snn4EcosetName) + if err != nil { + return nil, err + } + + datasetInfo, err := models.GetDatasetByRepo(ecosetRepo) + if err != nil { + return nil, err + } + + return models.GetAttachmentByDatasetIdFileName(setting.Snn4AttachmentName, datasetInfo.ID) + +} + func getBrainRegion(benchmarkChildTypeID int) string { values := []string{"V1", "V2", "V4", "IT"} return values[benchmarkChildTypeID] @@ -2745,18 +2811,24 @@ func InferenceCloudBrainJobShow(ctx *context.Context) { cloudBrainShow(ctx, tplCloudBrainInferenceJobShow, models.JobTypeInference) } -func DownloadInferenceResultFile(ctx *context.Context) { +func DownloadGPUInferenceResultFile(ctx *context.Context) { var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("version_name") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) return } - - allFile, err := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, task.ResultUrl) - returnFileName := task.DisplayJobName + ".zip" - MinioDownloadManyFile(task.ResultUrl, ctx, returnFileName, allFile) + parentDir := ctx.Query("parentDir") + filePath := "jobs/" + task.JobName + "/result/" + parentDir + log.Info("prefix=" + filePath) + allFile, err := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, filePath) + if err == nil { + returnFileName := task.DisplayJobName + ".zip" + MinioDownloadManyFile(filePath, ctx, returnFileName, allFile) + } else { + log.Info("error,msg=" + err.Error()) + ctx.ServerError("no file to download.", err) + } } func getInferenceJobCommand(form auth.CreateCloudBrainInferencForm) (string, error) { diff --git a/routers/repo/dataset.go b/routers/repo/dataset.go index 1f445492d..32c884b5e 100755 --- a/routers/repo/dataset.go +++ b/routers/repo/dataset.go @@ -373,6 +373,7 @@ func datasetMultiple(ctx *context.Context, opts *models.SearchDatasetOptions) { } data, err := json.Marshal(datasets) + log.Info("datakey", string(data)) if err != nil { log.Error("json.Marshal failed:", err.Error()) ctx.JSON(200, map[string]string{ diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 14db1a50d..d33fce106 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1239,7 +1239,7 @@ func GrampusTrainJobShow(ctx *context.Context) { return } task.ContainerIp = "" - + task.User, _ = models.GetUserByID(task.UserID) if task.DeletedAt.IsZero() { //normal record result, err := grampus.GetJob(task.JobID) if err != nil { @@ -1308,6 +1308,7 @@ func GrampusTrainJobShow(ctx *context.Context) { taskList := make([]*models.Cloudbrain, 0) taskList = append(taskList, task) prepareSpec4Show(ctx, task) + ctx.Data["version_list_task"] = taskList ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) @@ -1358,6 +1359,23 @@ func GrampusGetLog(ctx *context.Context) { }) return } + result, err := grampus.GetJob(jobID) + if err != nil { + log.Error("GetJob(%s) failed:%v", job.JobName, err) + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobName": job.JobName, + "Content": content, + "CanLogDownload": false, + }) + return + } + if result != nil { + job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) + if job.Status == models.GrampusStatusFailed { + content = content + "\n" + result.ExitDiagnostics + } + } + canLogDownload := err == nil && job.IsUserHasRight(ctx.User) ctx.JSON(http.StatusOK, map[string]interface{}{ "JobName": job.JobName, @@ -1682,6 +1700,10 @@ func GrampusNotebookRestart(ctx *context.Context) { if res.GrampusResult.ErrorCode != 0 || res.NewId == "" { log.Error("ManageNotebook2 failed:" + res.GrampusResult.ErrorMsg) errorMsg = ctx.Tr("repo.debug_again_fail") + if res.GrampusResult.ErrorCode == 5005 { + errorMsg = ctx.Tr("repo.debug_again_fail_forever") + } + break } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 3dbe101a8..e0b9cd1b6 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -218,6 +218,22 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm return } } + + var datasetInfos map[string]models.DatasetInfo + var attachSize int + if uuid != "" { + datasetInfos, _, err = models.GetDatasetInfo(uuid) + for _, infos := range datasetInfos { + attachSize += infos.Size + } + if attachSize > int(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) //GB + notebookNewDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tplModelArtsNotebookNew, &form) + return + } + } + var aiCenterCode = models.AICenterOfCloudBrainTwo if setting.ModelartsCD.Enabled { aiCenterCode = models.AICenterOfChengdu @@ -439,9 +455,13 @@ func NotebookDebug2(ctx *context.Context) { ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) return } - if task.BootFile != "" { - ctx.Redirect(getFileUrl(result.Url, task.BootFile) + "?token=" + result.Token) + + if ctx.QueryTrim("file") != "" { + ctx.Redirect(getFileUrl(result.Url, ctx.QueryTrim("file")) + "?token=" + result.Token) } else { + if task.BootFile != "" { + go cloudbrainTask.UploadNotebookFiles(task) + } ctx.Redirect(result.Url + "?token=" + result.Token) } @@ -463,7 +483,7 @@ func getFileUrl(url string, filename string) string { } } - return url + middle + path.Base(filename) + return url + middle + filename } func NotebookRestart(ctx *context.Context) { @@ -630,7 +650,7 @@ func NotebookStop(ctx *context.Context) { if task.Status != string(models.ModelArtsRunning) { log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) resultCode = "-1" - errorMsg = "the job is not running" + errorMsg = ctx.Tr("cloudbrain.Already_stopped") break } @@ -2589,7 +2609,8 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel ctx.Data["datasetType"] = models.TypeCloudBrainTwo waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") ctx.Data["WaitCount"] = waitCount - + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference)) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount return nil } func InferenceJobShow(ctx *context.Context) { @@ -2653,6 +2674,46 @@ func InferenceJobShow(ctx *context.Context) { ctx.HTML(http.StatusOK, tplModelArtsInferenceJobShow) } +func MultiModelDownload(ctx *context.Context) { + var ( + err error + ) + jobID := ctx.Params(":jobid") + versionName := ctx.Query("version_name") + parentDir := ctx.Query("parent_dir") + + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", task.JobName, err.Error()) + return + } + + if task.ComputeResource == models.NPUResource { + path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir), "/") + path = strings.TrimSuffix(path, "/") + path += "/" + allFile, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, path) + if err == nil { + returnFileName := task.DisplayJobName + ".zip" + ObsDownloadManyFile(path, ctx, returnFileName, allFile) + } else { + log.Info("error,msg=" + err.Error()) + ctx.ServerError("no file to download.", err) + } + } else if task.ComputeResource == models.GPUResource { + filePath := setting.CBCodePathPrefix + task.JobName + cloudbrain.ModelMountPath + "/" + parentDir + allFile, err := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, filePath) + if err == nil { + returnFileName := task.DisplayJobName + ".zip" + MinioDownloadManyFile(filePath, ctx, returnFileName, allFile) + } else { + log.Info("error,msg=" + err.Error()) + ctx.ServerError("no file to download.", err) + } + } + +} + func ModelDownload(ctx *context.Context) { var ( err error diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 063a20999..85e1b8a25 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1186,6 +1186,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate) m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) m.Get("/download_model", cloudbrain.AdminOrJobCreaterRight, repo.CloudBrainDownloadModel) + m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRight, repo.CloudBrainDownloadMultiModel) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.CloudBrainNew) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate) @@ -1209,6 +1210,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainTrainJobDel) //m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadModel) + m.Get("/download_multi_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadMultiModel) //m.Get("/get_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo.GetLogFromModelDir) //m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.CloudBrainTrainJobVersionNew) @@ -1221,8 +1223,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.InferenceCloudBrainJobShow) m.Get("/result_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.CloudBrainDownloadInferenceResult) - - m.Get("/downloadall", repo.DownloadInferenceResultFile) + m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRightForTrain, repo.CloudBrainDownloadMultiModel) + m.Get("/downloadall", cloudbrain.AdminOrJobCreaterRightForTrain, repo.DownloadGPUInferenceResultFile) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.InferenceCloudBrainJobNew) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainInferencForm{}), repo.CloudBrainInferenceJobCreate) @@ -1248,6 +1250,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusStopJob) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) + m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRightForTrain, repo.MultiModelDownload) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.GrampusTrainJobVersionNew) m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobVersionCreate) }) @@ -1333,6 +1336,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.TrainJobStop) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.TrainJobDel) m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) + m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRightForTrain, repo.MultiModelDownload) m.Get("/download_log_file", cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobDownloadLogFile) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, context.PointAccount(), repo.TrainJobNewVersion) m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) @@ -1348,7 +1352,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.InferenceJobShow) m.Get("/result_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ResultDownload) - m.Get("/downloadall", repo.DownloadMultiResultFile) + m.Get("/downloadall", cloudbrain.AdminOrJobCreaterRightForTrain, repo.DownloadMultiResultFile) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.InferenceJobNew) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) diff --git a/routers/user/home.go b/routers/user/home.go index 62b0357ad..e4ff50a45 100755 --- a/routers/user/home.go +++ b/routers/user/home.go @@ -779,7 +779,7 @@ func Cloudbrains(ctx *context.Context) { var jobTypes []string jobTypeNot := false if jobType == string(models.JobTypeBenchmark) { - jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet)) + jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeModelSafety), string(models.JobTypeBrainScore), string(models.JobTypeSnn4imagenet), string(models.JobTypeSnn4Ecoset)) } else if jobType != "all" && jobType != "" { jobTypes = append(jobTypes, jobType) } diff --git a/services/cloudbrain/clear.go b/services/cloudbrain/clear.go index 44613ee3c..e14414c74 100644 --- a/services/cloudbrain/clear.go +++ b/services/cloudbrain/clear.go @@ -14,21 +14,21 @@ import ( func ClearCloudbrainResultSpace() { log.Info("clear cloudbrain one result space begin.") - if !setting.ClearStrategy.Enabled{ + if !setting.ClearStrategy.Enabled { return } - tasks, err := models.GetCloudBrainOneStoppedNotDebugJobDaysAgo(setting.ClearStrategy.ResultSaveDays, setting.ClearStrategy.BatchSize) + tasks, err := models.GetGPUStoppedNotDebugJobDaysAgo(setting.ClearStrategy.ResultSaveDays, setting.ClearStrategy.BatchSize) if err != nil { log.Warn("Failed to get cloudbrain, clear result failed.", err) return } - debugTasks, err := models.GetCloudBrainOneStoppedDebugJobDaysAgo(setting.ClearStrategy.ResultSaveDays, setting.ClearStrategy.DebugJobSize) + debugTasks, err := models.GetGPUStoppedDebugJobDaysAgo(setting.ClearStrategy.ResultSaveDays, setting.ClearStrategy.DebugJobSize) if err != nil { log.Warn("Failed to get debug cloudbrain.", err) } - tasks=append(tasks,debugTasks...) + tasks = append(tasks, debugTasks...) if err != nil { log.Warn("Failed to get cloudbrain, clear result failed.", err) @@ -38,7 +38,7 @@ func ClearCloudbrainResultSpace() { for _, task := range tasks { err := DeleteCloudbrainOneJobStorage(task.JobName) if err == nil { - log.Info("clear job in cloudbrain table:"+task.JobName) + log.Info("clear job in cloudbrain table:" + task.JobName) ids = append(ids, task.ID) } } @@ -69,10 +69,10 @@ func clearMinioHistoryTrashFile() { SortModTimeAscend(miniofiles) for _, file := range miniofiles { - if file.Name()!="" && file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { + if file.Name() != "" && file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { - has,err:=models.IsCloudbrainExistByJobName(file.Name()) - if err==nil && !has { + has, err := models.IsCloudbrainExistByJobName(file.Name()) + if err == nil && !has { dirPath := setting.CBCodePathPrefix + file.Name() + "/" log.Info("clear job in minio trash:" + file.Name()) storage.Attachments.DeleteDir(dirPath) @@ -90,7 +90,7 @@ func clearMinioHistoryTrashFile() { } } -func clearLocalHistoryTrashFile() { +func clearLocalHistoryTrashFile() { files, err := ioutil.ReadDir(setting.JobPath) processCount := 0 if err != nil { @@ -99,11 +99,11 @@ func clearLocalHistoryTrashFile() { SortModTimeAscend(files) for _, file := range files { //清理n天前的历史垃圾数据,清理job目录 - if file.Name()!="" && file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { - has,err:=models.IsCloudbrainExistByJobName(file.Name()) - if err==nil && !has{ + if file.Name() != "" && file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) { + has, err := models.IsCloudbrainExistByJobName(file.Name()) + if err == nil && !has { os.RemoveAll(setting.JobPath + file.Name()) - log.Info("clear job in local trash:"+file.Name()) + log.Info("clear job in local trash:" + file.Name()) processCount++ } if processCount == setting.ClearStrategy.BatchSize { @@ -127,7 +127,7 @@ func SortModTimeAscend(files []os.FileInfo) { func DeleteCloudbrainOneJobStorage(jobName string) error { - if jobName==""{ + if jobName == "" { return nil } //delete local diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 4ae742c3a..172fa1502 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -34,7 +34,7 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s ComputeResource: models.GPUResource, }, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, - JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, + JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet, models.JobTypeSnn4Ecoset}, NotFinalStatuses: CloudbrainOneNotFinalStatuses, ComputeResource: models.GPUResource, }, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { @@ -76,7 +76,7 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { jobNewType := jobType - if jobType == string(models.JobTypeSnn4imagenet) || jobType == string(models.JobTypeBrainScore) { + if models.IsModelBenchMarkJobType(jobType) { jobNewType = string(models.JobTypeBenchmark) } diff --git a/services/cloudbrain/cloudbrainTask/notebook.go b/services/cloudbrain/cloudbrainTask/notebook.go index cc9563520..3526f6549 100644 --- a/services/cloudbrain/cloudbrainTask/notebook.go +++ b/services/cloudbrain/cloudbrainTask/notebook.go @@ -4,6 +4,9 @@ import ( "fmt" "net/http" "path" + "strings" + + "code.gitea.io/gitea/modules/notebook" "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/modelarts_cd" @@ -29,6 +32,9 @@ import ( ) const NoteBookExtension = ".ipynb" +const CPUType = 0 +const GPUType = 1 +const NPUType = 2 func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption) { @@ -66,7 +72,7 @@ func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOp } //create repo if not exist - repo, err := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName) + repo, _ := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName) if repo == nil { repo, err = repo_service.CreateRepository(ctx.User, ctx.User, models.CreateRepoOptions{ Name: setting.FileNoteBook.ProjectName, @@ -80,19 +86,222 @@ func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOp AutoInit: true, DefaultBranch: "master", }) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.failed_to_create_notebook_repo", setting.FileNoteBook.ProjectName))) + return + } + } else { + + noteBook, _ := models.GetWaitOrRunFileNotebookByRepo(repo.ID, getCloudbrainType(option.Type)) + if noteBook != nil { + + if isRepoConfilcts(option, noteBook) { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_repo_conflict"))) + return + } + + if isNotebookSpecMath(option, noteBook) { + if !isRepoMatch(option, noteBook) { + err = downloadCode(sourceRepo, getCodePath(noteBook.JobName, sourceRepo), option.BranchName) + if err != nil { + log.Error("download code failed", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + } + if !isRepoFileMatch(option, noteBook) { + noteBook.BootFile += ";" + getBootFile(option.File, option.OwnerName, option.ProjectName) + noteBook.BranchName += ";" + option.BranchName + noteBook.Description += ";" + getDescription(option) + + err := models.UpdateJob(noteBook) + if err != nil { + log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + } + + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 0, + Message: noteBook.JobID, + }) + return + } + + } + } + + if option.Type <= GPUType { + cloudBrainFileNoteBookCreate(ctx, option, repo, sourceRepo) + } else { + modelartsFileNoteBookCreate(ctx, option, repo, sourceRepo) + } + +} +func FileNotebookStatus(ctx *context.Context, option api.CreateFileNotebookJobOption) { + if ctx.Written() { + return } + + if path.Ext(option.File) != NoteBookExtension { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_select_wrong"))) + return + } + + isNotebookFileExist, _ := isNoteBookFileExist(ctx, option) + if !isNotebookFileExist { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist"))) + return + } + + task, err := models.GetCloudbrainByJobID(option.JobId) if err != nil { - ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.failed_to_create_notebook_repo", setting.FileNoteBook.ProjectName))) + log.Error("job not found:"+option.JobId, err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Job id may not be right. can not find job.")) return } - if option.Type <= 1 { - cloudBrainFileNoteBookCreate(ctx, option, repo, sourceRepo) + if task.BootFile == "" || task.Status != string(models.ModelArtsRunning) { + log.Warn("Boot file is empty or status is running. ") + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Boot file is empty or status is running.")) + return + } + if !isRepoFileMatch(option, task) { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("can not math repo file.")) + return + } + debugBaseUrl, token, err := getBaseUrlAndToken(task) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + if uploadNotebookFileIfCannotBroswer(debugBaseUrl, getBootFile(option.File, option.OwnerName, option.ProjectName), task, token) { + ctx.JSON(http.StatusOK, models.BaseOKMessageApi) } else { - modelartsFileNoteBookCreate(ctx, option, repo, sourceRepo) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("upload failed.")) + + } + +} + +func getBaseUrlAndToken(task *models.Cloudbrain) (string, string, error) { + var debugBaseUrl string + var token string + if task.Type == models.TypeCloudBrainOne { + debugBaseUrl = setting.DebugServerHost + "jpylab_" + task.JobID + "_" + task.SubTaskName + "/lab" + + } else { + var result *models.GetNotebook2Result + var err error + if task.Type == models.TypeCloudBrainTwo { + result, err = modelarts.GetNotebook2(task.JobID) + } else if task.Type == models.TypeCDCenter { + result, err = modelarts_cd.GetNotebook(task.JobID) + } + if err != nil || result == nil || result.Status != string(models.ModelArtsRunning) || result.Url == "" { + log.Error("notebook job not found:"+task.JobID, err) + return "", "", fmt.Errorf("can not get job or job is invalid.") + } + + debugBaseUrl = result.Url + token = result.Token + } + return debugBaseUrl, token, nil +} + +func uploadNotebookFileIfCannotBroswer(debugBaseUrl string, bootFile string, task *models.Cloudbrain, token string) bool { + c := ¬ebook.NotebookContent{ + Url: debugBaseUrl, + Path: bootFile, + PathType: "file", + Token: token, + } + if c.IsNotebookFileCanBrowser() { + return true + } else { + c.SetCookiesAndCsrf() + c.UploadNoteBookFile(task) + return c.IsNotebookFileCanBrowser() + } + +} + +func isNotebookSpecMath(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool { + if option.Type == NPUType || option.Type == CPUType { + return true + } + spec, err := models.GetCloudbrainSpecByID(book.ID) + if err != nil { + log.Warn("can not get spec ", err) + return false + } + return spec.AccCardsNum > 0 +} + +func isRepoConfilcts(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool { + bootFiles := strings.Split(book.BootFile, ";") + branches := strings.Split(book.BranchName, ";") + + for i, bootFile := range bootFiles { + splits := strings.Split(bootFile, "/") + if len(splits) >= 3 { + if splits[0] == option.OwnerName && splits[1] == option.ProjectName && branches[i] != option.BranchName { + return true + } + } + } + + return false } +func isRepoMatch(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool { + bootFiles := strings.Split(book.BootFile, ";") + + for _, bootFile := range bootFiles { + splits := strings.Split(bootFile, "/") + if len(splits) >= 3 { + if splits[0] == option.OwnerName && splits[1] == option.ProjectName { + return true + } + } + } + return false + +} + +func isRepoFileMatch(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool { + bootFiles := strings.Split(book.BootFile, ";") + branches := strings.Split(book.BranchName, ";") + + for i, bootFile := range bootFiles { + if branches[i] == option.BranchName && getBootFile(option.File, option.OwnerName, option.ProjectName) == bootFile { + return true + } + } + + return false + +} +func UploadNotebookFiles(task *models.Cloudbrain) { + if task.Status == string(models.JobRunning) && task.BootFile != "" { + + debugBaseUrl, token, err := getBaseUrlAndToken(task) + if err != nil { + log.Error("can not get base url:", err) + return + } + bootFiles := strings.Split(task.BootFile, ";") + + for _, bootFile := range bootFiles { + uploadNotebookFileIfCannotBroswer(debugBaseUrl, bootFile, task, token) + } + + } +} + func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) { displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name) @@ -131,17 +340,18 @@ func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNot } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - ctx.JSON(http.StatusOK,models.BaseMessageApi{ - Code: 2, + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 2, Message: ctx.Tr("repo.cloudbrain.morethanonejob"), }) return } } - errStr := uploadCodeFile(sourceRepo, getCodePath(jobName), option.BranchName, option.File, jobName) - if errStr != "" { - ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist"))) + err = downloadCode(sourceRepo, getCodePath(jobName, sourceRepo), option.BranchName) + if err != nil { + log.Error("download code failed", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) return } command := cloudbrain.GetCloudbrainDebugCommand() @@ -185,7 +395,7 @@ func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNot JobType: jobType, Description: getDescription(option), BranchName: option.BranchName, - BootFile: option.File, + BootFile: getBootFile(option.File, option.OwnerName, option.ProjectName), Params: "{\"parameter\":[]}", CommitID: "", BenchmarkTypeID: 0, @@ -206,8 +416,18 @@ func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNot } -func getCodePath(jobName string) string { - return setting.JobPath + jobName + cloudbrain.CodeMountPath +func getCloudbrainType(optionType int) int { + if optionType < 1 { + return models.TypeCloudBrainOne + } + if setting.ModelartsCD.Enabled { + return models.TypeCDCenter + } + return models.TypeCloudBrainTwo +} + +func getCodePath(jobName string, repo *models.Repository) string { + return setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" + repo.OwnerName + "/" + repo.Name } func getDescription(option api.CreateFileNotebookJobOption) string { @@ -237,8 +457,8 @@ func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNote } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - ctx.JSON(http.StatusOK,models.BaseMessageApi{ - Code: 2, + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 2, Message: ctx.Tr("repo.cloudbrain.morethanonejob"), }) return @@ -260,7 +480,7 @@ func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNote } } - err = downloadCode(sourceRepo, getCodePath(jobName), option.BranchName) + err = downloadCode(sourceRepo, getCodePath(jobName, sourceRepo), option.BranchName) if err != nil { ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) return @@ -297,8 +517,9 @@ func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNote Description: getDescription(option), ImageId: setting.FileNoteBook.ImageIdNPU, Spec: spec, - BootFile: "", + BootFile: getBootFile(option.File, option.OwnerName, option.ProjectName), AutoStopDurationMs: modelarts.AutoStopDurationMs / 4, + BranchName: option.BranchName, } if setting.ModelartsCD.Enabled { @@ -347,17 +568,8 @@ func isNoteBookFileExist(ctx *context.Context, option api.CreateFileNotebookJobO return true, nil } -func uploadCodeFile(repo *models.Repository, codePath string, branchName string, filePath string, jobName string) string { - err := downloadCode(repo, codePath, branchName) - if err != nil { - return "cloudbrain.load_code_failed" - } - - err = uploadOneFileToMinio(codePath, filePath, jobName, cloudbrain.CodeMountPath+"/") - if err != nil { - return "cloudbrain.load_code_failed" - } - return "" +func getBootFile(filePath string, ownerName string, projectName string) string { + return ownerName + "/" + projectName + "/" + filePath } func fileExists(gitRepo *git.Repository, path string, branch string) (bool, error) { diff --git a/templates/custom/task_wait_count.tmpl b/templates/custom/task_wait_count.tmpl index fb8ee71fb..997a0f1c3 100644 --- a/templates/custom/task_wait_count.tmpl +++ b/templates/custom/task_wait_count.tmpl @@ -1,7 +1,7 @@