diff --git a/models/ai_model_manage.go b/models/ai_model_manage.go index d9adda2dc..0d754b0ba 100644 --- a/models/ai_model_manage.go +++ b/models/ai_model_manage.go @@ -12,67 +12,67 @@ import ( ) type AiModelManage struct { - ID string `xorm:"pk"` - Name string `xorm:"INDEX NOT NULL"` - Version string `xorm:"NOT NULL"` - VersionCount int `xorm:"NOT NULL DEFAULT 0"` - New int `xorm:"NOT NULL"` - Type int `xorm:"NOT NULL"` - Size int64 `xorm:"NOT NULL"` - Description string `xorm:"varchar(2000)"` - Label string `xorm:"varchar(1000)"` - Path string `xorm:"varchar(400) NOT NULL"` - DownloadCount int `xorm:"NOT NULL DEFAULT 0"` - Engine int64 `xorm:"NOT NULL DEFAULT 0"` - Status int `xorm:"NOT NULL DEFAULT 0"` - StatusDesc string `xorm:"varchar(500)"` - Accuracy string `xorm:"varchar(1000)"` - AttachmentId string `xorm:"NULL"` - RepoId int64 `xorm:"INDEX NULL"` - CodeBranch string `xorm:"varchar(400) NULL"` - CodeCommitID string `xorm:"NULL"` - UserId int64 `xorm:"NOT NULL"` - UserName string - UserRelAvatarLink string - TrainTaskInfo string `xorm:"text NULL"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` - UpdatedUnix timeutil.TimeStamp `xorm:"updated"` - IsCanOper bool - IsCanDelete bool + ID string `xorm:"pk" json:"id"` + Name string `xorm:"INDEX NOT NULL" json:"name"` + Version string `xorm:"NOT NULL" json:"version"` + VersionCount int `xorm:"NOT NULL DEFAULT 0" json:"versionCount"` + New int `xorm:"NOT NULL" json:"new"` + Type int `xorm:"NOT NULL" json:"type"` + Size int64 `xorm:"NOT NULL" json:"size"` + Description string `xorm:"varchar(2000)" json:"description"` + Label string `xorm:"varchar(1000)" json:"label"` + Path string `xorm:"varchar(400) NOT NULL" json:"path"` + DownloadCount int `xorm:"NOT NULL DEFAULT 0" json:"downloadCount"` + Engine int64 `xorm:"NOT NULL DEFAULT 0" json:"engine"` + Status int `xorm:"NOT NULL DEFAULT 0" json:"status"` + StatusDesc string `xorm:"varchar(500)" json:"statusDesc"` + Accuracy string `xorm:"varchar(1000)" json:"accuracy"` + AttachmentId string `xorm:"NULL" json:"attachmentId"` + RepoId int64 `xorm:"INDEX NULL" json:"repoId"` + CodeBranch string `xorm:"varchar(400) NULL" json:"codeBranch"` + CodeCommitID string `xorm:"NULL" json:"codeCommitID"` + UserId int64 `xorm:"NOT NULL" json:"userId"` + UserName string `json:"userName"` + UserRelAvatarLink string `json:"userRelAvatarLink"` + TrainTaskInfo string `xorm:"text NULL" json:"trainTaskInfo"` + CreatedUnix timeutil.TimeStamp `xorm:"created" json:"createdUnix"` + UpdatedUnix timeutil.TimeStamp `xorm:"updated" json:"updatedUnix"` + IsCanOper bool `json:"isCanOper"` + IsCanDelete bool `json:"isCanDelete"` } type AiModelConvert struct { - ID string `xorm:"pk"` - Name string `xorm:"INDEX NOT NULL"` - Status string `xorm:"NULL"` - StatusResult string `xorm:"NULL"` - SrcEngine int `xorm:"NOT NULL DEFAULT 0"` - RepoId int64 `xorm:"INDEX NULL"` - ModelId string `xorm:"NOT NULL"` - ModelName string `xorm:"NULL"` - ModelVersion string `xorm:"NOT NULL"` - ModelPath string `xorm:"NULL"` - DestFormat int `xorm:"NOT NULL DEFAULT 0"` - NetOutputFormat int `xorm:"NULL"` - UserId int64 `xorm:"NOT NULL"` - CloudBrainTaskId string `xorm:"NULL"` - ModelArtsVersionId string `xorm:"NULL"` - ContainerID string - ContainerIp string - RunTime int64 `xorm:"NULL"` - TrainJobDuration string - InputShape string `xorm:"varchar(2000)"` - InputDataFormat string `xorm:"NOT NULL"` - Description string `xorm:"varchar(2000)"` - Path string `xorm:"varchar(400) NOT NULL"` - CreatedUnix timeutil.TimeStamp `xorm:"created"` - UpdatedUnix timeutil.TimeStamp `xorm:"updated"` - StartTime timeutil.TimeStamp - EndTime timeutil.TimeStamp - UserName string - UserRelAvatarLink string - IsCanOper bool - IsCanDelete bool + ID string `xorm:"pk" json:"id"` + Name string `xorm:"INDEX NOT NULL" json:"name"` + Status string `xorm:"NULL" json:"status"` + StatusResult string `xorm:"NULL" json:"statusResult"` + SrcEngine int `xorm:"NOT NULL DEFAULT 0" json:"srcEngine"` + RepoId int64 `xorm:"INDEX NULL" json:"repoId"` + ModelId string `xorm:"NOT NULL" json:"modelId"` + ModelName string `xorm:"NULL" json:"modelName"` + ModelVersion string `xorm:"NOT NULL" json:"modelVersion"` + ModelPath string `xorm:"NULL" json:"modelPath"` + DestFormat int `xorm:"NOT NULL DEFAULT 0" json:"destFormat"` + NetOutputFormat int `xorm:"NULL" json:"netOutputFormat"` + UserId int64 `xorm:"NOT NULL" json:"userId"` + CloudBrainTaskId string `xorm:"NULL" json:"cloudBrainTaskId"` + ModelArtsVersionId string `xorm:"NULL" json:"modelArtsVersionId"` + ContainerID string `json:"containerID"` + ContainerIp string `json:"containerIp"` + RunTime int64 `xorm:"NULL" json:"runTime"` + TrainJobDuration string `json:"trainJobDuration"` + InputShape string `xorm:"varchar(2000)" json:"inputShape"` + InputDataFormat string `xorm:"NOT NULL" json:"inputDataFormat"` + Description string `xorm:"varchar(2000)" json:"description"` + Path string `xorm:"varchar(400) NOT NULL" json:"path"` + CreatedUnix timeutil.TimeStamp `xorm:"created" json:"createdUnix"` + UpdatedUnix timeutil.TimeStamp `xorm:"updated" json:"updatedUnix"` + StartTime timeutil.TimeStamp `json:"startTime"` + EndTime timeutil.TimeStamp `json:"endTime"` + UserName string `json:"userName"` + UserRelAvatarLink string `json:"userRelAvatarLink"` + IsCanOper bool `json:"isCanOper"` + IsCanDelete bool `json:"isCanDelete"` } type AiModelQueryOptions struct { diff --git a/models/attachment.go b/models/attachment.go index 55c6dbad3..2b5fa8efc 100755 --- a/models/attachment.go +++ b/models/attachment.go @@ -61,30 +61,6 @@ type AttachmentUsername struct { Name string } -type AttachmentInfo struct { - Attachment `xorm:"extends"` - Repo *Repository `xorm:"extends"` - RelAvatarLink string `xorm:"extends"` - UserName string `xorm:"extends"` - Recommend bool `xorm:"-"` -} - -type AttachmentsOptions struct { - ListOptions - DatasetIDs []int64 - DecompressState int - Type int - UploaderID int64 - NeedDatasetIDs bool - NeedIsPrivate bool - IsPrivate bool - JustNeedZipFile bool - NeedRepoInfo bool - Keyword string - RecommendOnly bool - UserId int64 -} - func (a *Attachment) AfterUpdate() { if a.DatasetID > 0 { datasetIsPublicCount, err := x.Where("dataset_id = ? AND is_private = ?", a.DatasetID, false).Count(new(Attachment)) @@ -493,19 +469,6 @@ func getPrivateAttachments(e Engine, userID int64) ([]*AttachmentUsername, error return attachments, nil } -func getAllUserAttachments(e Engine, userID int64) ([]*AttachmentUsername, error) { - attachments := make([]*AttachmentUsername, 0, 10) - if err := e.Table("attachment").Join("LEFT", "`user`", "attachment.uploader_id "+ - "= `user`.id").Where("decompress_state= ? and attachment.type = ? and (uploader_id= ? or is_private = ?)", DecompressStateDone, TypeCloudBrainOne, userID, false).Find(&attachments); err != nil { - return nil, err - } - return attachments, nil -} - -func GetAllUserAttachments(userID int64) ([]*AttachmentUsername, error) { - return getAllUserAttachments(x, userID) -} - func getModelArtsUserAttachments(e Engine, userID int64) ([]*AttachmentUsername, error) { attachments := make([]*AttachmentUsername, 0, 10) if err := e.Table("attachment").Join("LEFT", "`user`", "attachment.uploader_id "+ @@ -601,107 +564,6 @@ func GetAllAttachmentSize() (int64, error) { return x.SumInt(&Attachment{}, "size") } -func Attachments(opts *AttachmentsOptions) ([]*AttachmentInfo, int64, error) { - sess := x.NewSession() - defer sess.Close() - - var cond = builder.NewCond() - if opts.NeedDatasetIDs { - cond = cond.And( - builder.In("attachment.dataset_id", opts.DatasetIDs), - ) - } - - if opts.UploaderID > 0 { - cond = cond.And( - builder.Eq{"attachment.uploader_id": opts.UploaderID}, - ) - } - - if (opts.Type) >= 0 { - cond = cond.And( - builder.Eq{"attachment.type": opts.Type}, - ) - } - - if opts.NeedIsPrivate { - cond = cond.And( - builder.Eq{"attachment.is_private": opts.IsPrivate}, - ) - } - if opts.RecommendOnly { - cond = cond.And(builder.In("attachment.id", builder.Select("attachment.id"). - From("attachment"). - Join("INNER", "dataset", "attachment.dataset_id = dataset.id and dataset.recommend=true"))) - } - - if opts.JustNeedZipFile { - var DecompressState []int32 - DecompressState = append(DecompressState, DecompressStateDone, DecompressStateIng, DecompressStateFailed) - cond = cond.And( - builder.In("attachment.decompress_state", DecompressState), - ) - } - - var count int64 - var err error - if len(opts.Keyword) == 0 { - count, err = sess.Where(cond).Count(new(Attachment)) - } else { - lowerKeyWord := strings.ToLower(opts.Keyword) - - cond = cond.And(builder.Or(builder.Like{"LOWER(attachment.name)", lowerKeyWord}, builder.Like{"LOWER(attachment.description)", lowerKeyWord})) - count, err = sess.Table(&Attachment{}).Where(cond).Count(new(AttachmentInfo)) - - } - - if err != nil { - return nil, 0, fmt.Errorf("Count: %v", err) - } - - if opts.Page >= 0 && opts.PageSize > 0 { - var start int - if opts.Page == 0 { - start = 0 - } else { - start = (opts.Page - 1) * opts.PageSize - } - sess.Limit(opts.PageSize, start) - } - - sess.OrderBy("attachment.created_unix DESC") - attachments := make([]*AttachmentInfo, 0, setting.UI.DatasetPagingNum) - if err := sess.Table(&Attachment{}).Where(cond). - Find(&attachments); err != nil { - return nil, 0, fmt.Errorf("Find: %v", err) - } - - if opts.NeedRepoInfo { - for _, attachment := range attachments { - dataset, err := GetDatasetByID(attachment.DatasetID) - if err != nil { - return nil, 0, fmt.Errorf("GetDatasetByID failed error: %v", err) - } - attachment.Recommend = dataset.Recommend - repo, err := GetRepositoryByID(dataset.RepoID) - if err == nil { - attachment.Repo = repo - } else { - return nil, 0, fmt.Errorf("GetRepositoryByID failed error: %v", err) - } - user, err := GetUserByID(attachment.UploaderID) - if err == nil { - attachment.RelAvatarLink = user.RelAvatarLink() - attachment.UserName = user.Name - } else { - return nil, 0, fmt.Errorf("GetUserByID failed error: %v", err) - } - } - } - - return attachments, count, nil -} - func GetAllDatasetContributorByDatasetId(datasetId int64) ([]*User, error) { r := make([]*User, 0) if err := x.Select("distinct(public.user.*)").Table("attachment").Join("LEFT", "user", "public.user.ID = attachment.uploader_id").Where("attachment.dataset_id = ?", datasetId).Find(&r); err != nil { diff --git a/models/base_message.go b/models/base_message.go index 37f7668ad..f76e18175 100644 --- a/models/base_message.go +++ b/models/base_message.go @@ -14,3 +14,24 @@ func BaseErrorMessage(message string) BaseMessage { 1, message, } } + +type BaseMessageApi struct { + Code int `json:"code"` + Message string `json:"message"` +} + +var BaseOKMessageApi = BaseMessageApi{ + 0, "", +} + +func BaseErrorMessageApi(message string) BaseMessageApi { + return BaseMessageApi{ + 1, message, + } +} + +type BaseMessageWithDataApi struct { + Code int `json:"code"` + Message string `json:"message"` + Data interface{} `json:"data"` +} diff --git a/models/dataset.go b/models/dataset.go index 720850ed9..972503641 100755 --- a/models/dataset.go +++ b/models/dataset.go @@ -22,8 +22,8 @@ const ( type Dataset struct { ID int64 `xorm:"pk autoincr"` - Title string `xorm:"INDEX NOT NULL"` - Status int32 `xorm:"INDEX"` // normal_private: 0, pulbic: 1, is_delete: 2 + Title string `xorm:"INDEX NOT NULL""` + Status int32 `xorm:"INDEX""` // normal_private: 0, pulbic: 1, is_delete: 2 Category string Description string `xorm:"TEXT"` DownloadTimes int64 diff --git a/models/repo.go b/models/repo.go index 6009c776f..f0760108b 100755 --- a/models/repo.go +++ b/models/repo.go @@ -223,10 +223,10 @@ type Repository struct { BlockChainStatus RepoBlockChainStatus `xorm:"NOT NULL DEFAULT 0"` // git clone and git pull total count - CloneCnt int64 `xorm:"NOT NULL DEFAULT 0"` + CloneCnt int64 `xorm:"NOT NULL DEFAULT 0" json:"clone_cnt"` // only git clone total count - GitCloneCnt int64 `xorm:"NOT NULL DEFAULT 0"` + GitCloneCnt int64 `xorm:"NOT NULL DEFAULT 0" json:"git_clone_cnt"` CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 9744bc387..0221c51d8 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -57,29 +57,26 @@ type CreateModelArtsTrainJobForm struct { } type CreateModelArtsInferenceJobForm struct { - DisplayJobName string `form:"display_job_name" binding:"Required"` - JobName string `form:"job_name" binding:"Required"` - Attachment string `form:"attachment" binding:"Required"` - BootFile string `form:"boot_file" binding:"Required"` - WorkServerNumber int `form:"work_server_number" binding:"Required"` - EngineID int `form:"engine_id" binding:"Required"` - PoolID string `form:"pool_id" binding:"Required"` - Flavor string `form:"flavor" binding:"Required"` - Params string `form:"run_para_list" binding:"Required"` - Description string `form:"description"` - IsSaveParam string `form:"is_save_para"` - ParameterTemplateName string `form:"parameter_template_name"` - PrameterDescription string `form:"parameter_description"` - BranchName string `form:"branch_name" binding:"Required"` - VersionName string `form:"version_name" binding:"Required"` - FlavorName string `form:"flaver_names" binding:"Required"` - EngineName string `form:"engine_names" binding:"Required"` - LabelName string `form:"label_names" binding:"Required"` - TrainUrl string `form:"train_url" binding:"Required"` - ModelName string `form:"model_name" binding:"Required"` - ModelVersion string `form:"model_version" binding:"Required"` - CkptName string `form:"ckpt_name" binding:"Required"` - SpecId int64 `form:"spec_id" binding:"Required"` + DisplayJobName string `form:"display_job_name" binding:"Required"` + JobName string `form:"job_name" binding:"Required"` + Attachment string `form:"attachment" binding:"Required"` + BootFile string `form:"boot_file" binding:"Required"` + WorkServerNumber int `form:"work_server_number" binding:"Required"` + EngineID int `form:"engine_id" binding:"Required"` + PoolID string `form:"pool_id" binding:"Required"` + Flavor string `form:"flavor" binding:"Required"` + Params string `form:"run_para_list" binding:"Required"` + Description string `form:"description"` + BranchName string `form:"branch_name" binding:"Required"` + VersionName string `form:"version_name" binding:"Required"` + FlavorName string `form:"flaver_names" binding:"Required"` + EngineName string `form:"engine_names" binding:"Required"` + LabelName string `form:"label_names" binding:"Required"` + TrainUrl string `form:"train_url" binding:"Required"` + ModelName string `form:"model_name" binding:"Required"` + ModelVersion string `form:"model_version" binding:"Required"` + CkptName string `form:"ckpt_name" binding:"Required"` + SpecId int64 `form:"spec_id" binding:"Required"` } func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index e57bd8d7e..8d4e57670 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -228,7 +228,7 @@ func AdminOrImageCreaterRight(ctx *context.Context) { } -func GenerateTask(req GenerateCloudBrainTaskReq) error { +func GenerateTask(req GenerateCloudBrainTaskReq) (string, error) { var versionCount int if req.JobType == string(models.JobTypeTrain) { versionCount = 1 @@ -335,11 +335,11 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { }) if err != nil { log.Error("CreateJob failed:", err.Error(), req.Ctx.Data["MsgID"]) - return err + return "", err } if jobResult.Code != Success { log.Error("CreateJob(%s) failed:%s", req.JobName, jobResult.Msg, req.Ctx.Data["MsgID"]) - return errors.New(jobResult.Msg) + return "", errors.New(jobResult.Msg) } var jobID = jobResult.Payload["jobId"].(string) @@ -380,13 +380,13 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { }) if err != nil { - return err + return "", err } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { log.Error("GetCloudbrainByJobID failed: %v", err.Error()) - return err + return "", err } stringId := strconv.FormatInt(task.ID, 10) @@ -401,7 +401,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask) } - return nil + return jobID, nil } func IsBenchmarkJob(jobType string) bool { diff --git a/modules/convert/cloudbrain.go b/modules/convert/cloudbrain.go new file mode 100644 index 000000000..1487f468e --- /dev/null +++ b/modules/convert/cloudbrain.go @@ -0,0 +1,111 @@ +package convert + +import ( + "code.gitea.io/gitea/models" + api "code.gitea.io/gitea/modules/structs" +) + +func ToCloudBrain(task *models.Cloudbrain) *api.Cloudbrain { + return &api.Cloudbrain{ + ID: task.ID, + JobID: task.JobID, + JobType: task.JobType, + Type: task.Type, + DisplayJobName: task.DisplayJobName, + Status: task.Status, + CreatedUnix: int64(task.CreatedUnix), + RepoID: task.RepoID, + Duration: task.Duration, + TrainJobDuration: task.TrainJobDuration, + ImageID: task.ImageID, + Image: task.Image, + Uuid: task.Uuid, + DatasetName: task.DatasetName, + ComputeResource: task.ComputeResource, + AiCenter: task.AiCenter, + BranchName: task.BranchName, + Parameters: task.Parameters, + BootFile: task.BootFile, + Description: task.Description, + ModelName: task.ModelName, + + ModelVersion: task.ModelVersion, + CkptName: task.CkptName, + + StartTime: int64(task.StartTime), + EndTime: int64(task.EndTime), + + Spec: ToSpecification(task.Spec), + } +} +func ToAttachment(attachment *models.Attachment) *api.AttachmentShow { + return &api.AttachmentShow{ + ID: attachment.ID, + UUID: attachment.UUID, + DatasetID: attachment.DatasetID, + ReleaseID: attachment.ReleaseID, + UploaderID: attachment.UploaderID, + CommentID: attachment.CommentID, + Name: attachment.Name, + Description: attachment.Description, + DownloadCount: attachment.DownloadCount, + UseNumber: attachment.UseNumber, + Size: attachment.Size, + IsPrivate: attachment.IsPrivate, + DecompressState: attachment.DecompressState, + Type: attachment.Type, + CreatedUnix: int64(attachment.CreatedUnix), + } +} + +func ToDataset(dataset *models.Dataset) *api.Dataset { + var convertAttachments []*api.AttachmentShow + for _, attachment := range dataset.Attachments { + convertAttachments = append(convertAttachments, ToAttachment(attachment)) + } + return &api.Dataset{ + ID: dataset.ID, + Title: dataset.Title, + Status: dataset.Status, + Category: dataset.Category, + Description: dataset.Description, + DownloadTimes: dataset.DownloadTimes, + UseCount: dataset.UseCount, + NumStars: dataset.NumStars, + Recommend: dataset.Recommend, + License: dataset.License, + Task: dataset.Task, + ReleaseID: dataset.ReleaseID, + UserID: dataset.UserID, + RepoID: dataset.RepoID, + Repo: &api.RepositoryShow{ + OwnerName: dataset.Repo.OwnerName, + Name: dataset.Repo.Name, + }, + CreatedUnix: int64(dataset.CreatedUnix), + UpdatedUnix: int64(dataset.UpdatedUnix), + Attachments: convertAttachments, + } +} + +func ToSpecification(s *models.Specification) *api.SpecificationShow { + return &api.SpecificationShow{ + ID: s.ID, + AccCardsNum: s.AccCardsNum, + AccCardType: s.AccCardType, + CpuCores: s.CpuCores, + MemGiB: s.MemGiB, + GPUMemGiB: s.GPUMemGiB, + ShareMemGiB: s.ShareMemGiB, + ComputeResource: s.ComputeResource, + UnitPrice: s.UnitPrice, + } +} + +func ToTagger(user *models.User) *api.Tagger { + return &api.Tagger{ + Name: user.Name, + RelAvatarURL: user.RelAvatarLink(), + Email: user.Email, + } +} diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 35ea815b5..b6f62560a 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -102,7 +102,7 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram return datasetGrampus } -func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { +func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() centerID, centerName := getCentersParamter(ctx, req) @@ -150,7 +150,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error }) if err != nil { log.Error("createJob failed: %v", err.Error()) - return err + return "", err } jobID := jobResult.JobInfo.JobID @@ -191,7 +191,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) - return err + return "", err } var actionType models.ActionType @@ -202,7 +202,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error } notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType) - return nil + return jobID, nil } func getCentersParamter(ctx *context.Context, req *GenerateTrainJobReq) ([]string, []string) { diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 567f6d620..dd502dfd0 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -350,7 +350,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc return nil } -func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { +func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() var jobResult *models.CreateTrainJobResult var createErr error @@ -410,17 +410,17 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error }) if errTemp != nil { log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) - return errTemp + return "", errTemp } } - return createErr + return "", createErr } - jobId := strconv.FormatInt(jobResult.JobID, 10) + jobID := strconv.FormatInt(jobResult.JobID, 10) createErr = models.CreateCloudbrain(&models.Cloudbrain{ Status: TransTrainJobStatus(jobResult.Status), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, - JobID: jobId, + JobID: jobID, JobName: req.JobName, DisplayJobName: req.DisplayJobName, JobType: string(models.JobTypeTrain), @@ -458,10 +458,10 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error if createErr != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, createErr.Error()) - return createErr + return "", createErr } - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask) - return nil + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateTrainTask) + return jobID, nil } func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) { @@ -682,7 +682,7 @@ func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) { return VersionOutputPath } -func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { +func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() var jobResult *models.CreateTrainJobResult var createErr error @@ -742,10 +742,10 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e }) if err != nil { log.Error("InsertCloudbrainTemp failed: %v", err.Error()) - return err + return "", err } } - return err + return "", err } // attach, err := models.GetAttachmentByUUID(req.Uuid) @@ -796,7 +796,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) - return err + return "", err } if req.JobType == string(models.JobTypeModelSafety) { task, err := models.GetCloudbrainByJobID(jobID) @@ -807,7 +807,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateInferenceTask) } - return nil + return jobID, nil } func GetNotebookImageName(imageId string) (string, error) { diff --git a/modules/structs/attachment.go b/modules/structs/attachment.go index 0f3c2ed6a..4a0f8feec 100755 --- a/modules/structs/attachment.go +++ b/modules/structs/attachment.go @@ -27,3 +27,48 @@ type Attachment struct { type EditAttachmentOptions struct { Name string `json:"name"` } + +type Dataset struct { + ID int64 `json:"id"` + Title string `json:"title"` + Status int32 `json:"status"` + Category string `json:"category"` + Description string `json:"description"` + DownloadTimes int64 `json:"downloadTimes"` + UseCount int64 `json:"useCount"` + NumStars int `json:"numStars"` + Recommend bool `json:"recommend"` + License string `json:"license"` + Task string `json:"task"` + ReleaseID int64 `json:"releaseId"` + UserID int64 `json:"userId"` + RepoID int64 `json:"repoId"` + Repo *RepositoryShow `json:"repo"` + CreatedUnix int64 `json:"createdUnix"` + UpdatedUnix int64 `json:"updatedUnix"` + + Attachments []*AttachmentShow `json:"attachments"` +} + +type RepositoryShow struct { + OwnerName string `json:"ownerName"` + Name string `json:"name"` +} + +type AttachmentShow struct { + ID int64 `json:"id"` + UUID string `json:"uuid"` + DatasetID int64 `json:"datasetId"` + ReleaseID int64 `json:"releaseId"` + UploaderID int64 `json:"uploaderId"` + CommentID int64 `json:"commentId"` + Name string `json:"name"` + Description string `json:"description"` + DownloadCount int64 `json:"downloadCount"` + UseNumber int64 `json:"useNumber"` + Size int64 `json:"size"` + IsPrivate bool `json:"isPrivate"` + DecompressState int32 `json:"decompressState"` + Type int `json:"type"` + CreatedUnix int64 `json:"createdUnix"` +} diff --git a/modules/structs/cloudbrain.go b/modules/structs/cloudbrain.go new file mode 100644 index 000000000..866c85dad --- /dev/null +++ b/modules/structs/cloudbrain.go @@ -0,0 +1,84 @@ +package structs + +type CreateGrampusTrainJobOption struct { + DisplayJobName string `json:"display_job_name" binding:"Required"` + JobName string `json:"job_name" binding:"Required" ` + Attachment string `json:"attachment" binding:"Required"` + BootFile string `json:"boot_file" binding:"Required"` + ImageID string `json:"image_id" binding:"Required"` + Params string `json:"run_para_list" binding:"Required"` + Description string `json:"description"` + BranchName string `json:"branch_name" binding:"Required"` + EngineName string `json:"engine_name" binding:"Required"` + WorkServerNumber int `json:"work_server_number" binding:"Required"` + Image string `json:"image" binding:"Required"` + DatasetName string `json:"dataset_name" binding:"Required"` + ModelName string `json:"model_name"` + ModelVersion string `json:"model_version"` + CkptName string `json:"ckpt_name"` + LabelName string `json:"label_names"` + PreTrainModelUrl string `json:"pre_train_model_url"` + SpecId int64 `json:"spec_id" binding:"Required"` +} + +type CreateTrainJobOption struct { + Type int `json:"type"` + DisplayJobName string `json:"display_job_name" binding:"Required"` + ImageID string `json:"image_id"` + Image string `json:"image" binding:"Required"` + Attachment string `json:"attachment" binding:"Required"` + DatasetName string `json:"dataset_name" binding:"Required"` + Description string `json:"description" ` + BootFile string `json:"boot_file" binding:"Required"` + BranchName string `json:"branch_name" binding:"Required"` + Params string `json:"run_para_list" binding:"Required"` + WorkServerNumber int `json:"work_server_number"` + ModelName string `json:"model_name"` + ModelVersion string `json:"model_version"` + CkptName string `json:"ckpt_name"` + LabelName string `json:"label_names"` + PreTrainModelUrl string `json:"pre_train_model_url"` + SpecId int64 `json:"spec_id" binding:"Required"` +} + +type Cloudbrain struct { + ID int64 `json:"id"` + JobID string `json:"job_id"` + JobType string `json:"job_type"` + Type int `json:"type"` + DisplayJobName string `json:"display_job_name"` + Status string `json:"status"` + CreatedUnix int64 `json:"created_unix"` + RepoID int64 `json:"repo_id"` + Duration int64 `json:"duration"` //运行时长 单位秒 + TrainJobDuration string `json:"train_job_duration"` + ImageID string `json:"image_id"` //grampus image_id + Image string `json:"image"` + Uuid string `json:"uuid"` //数据集id + DatasetName string `json:"dataset_name"` + ComputeResource string `json:"compute_resource"` //计算资源,例如npu + AiCenter string `json:"ai_center"` //grampus ai center: center_id+center_name + BranchName string `json:"branch_name"` //分支名称 + Parameters string `json:"parameters"` //传给modelarts的param参数 + BootFile string `json:"boot_file"` //启动文件 + Description string `json:"description"` //描述 + ModelName string `json:"model_name"` //模型名称 + ModelVersion string `json:"model_version"` //模型版本 + CkptName string `json:"ckpt_name"` //权重文件名称 + StartTime int64 `json:"start_time"` + EndTime int64 `json:"end_time"` + + Spec *SpecificationShow `json:"spec"` +} + +type SpecificationShow struct { + ID int64 `json:"id"` + AccCardsNum int `json:"acc_cards_num"` + AccCardType string `json:"acc_card_type"` + CpuCores int `json:"cpu_cores"` + MemGiB float32 `json:"mem_gi_b"` + GPUMemGiB float32 `json:"gpu_mem_gi_b"` + ShareMemGiB float32 `json:"share_mem_gi_b"` + ComputeResource string `json:"compute_resource"` + UnitPrice int `json:"unit_price"` +} diff --git a/modules/structs/tagger.go b/modules/structs/tagger.go new file mode 100644 index 000000000..8933c8c5c --- /dev/null +++ b/modules/structs/tagger.go @@ -0,0 +1,7 @@ +package structs + +type Tagger struct { + Name string `json:"name"` + Email string `json:"email"` + RelAvatarURL string `json:"relAvatarURL"` +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 0be49aa04..e8e8722e2 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -617,6 +617,7 @@ organization = Organizations uid = Uid u2f = Security Keys bind_wechat = Bind WeChat +no_wechat_bind = Can not do the operation, please bind WeChat first. wechat_bind = WeChat Binding bind_account_information = Bind account information bind_time = Bind Time diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 8acb3d9fa..d13c99443 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -622,6 +622,7 @@ organization=组织 uid=用户 ID u2f=安全密钥 wechat_bind = 微信绑定 +no_wechat_bind = 不能创建任务,请先绑定微信。 bind_wechat = 绑定微信 bind_account_information = 绑定账号信息 bind_time = 绑定时间 diff --git a/routers/admin/resources.go b/routers/admin/resources.go index 1d3870a14..20638553b 100644 --- a/routers/admin/resources.go +++ b/routers/admin/resources.go @@ -182,7 +182,7 @@ func UpdateResourceSpecification(ctx *context.Context, req models.ResourceSpecif if err != nil { log.Error("UpdateResourceSpecification error. %v", err) - ctx.JSON(http.StatusOK, response.ResponseError(err)) + ctx.JSON(http.StatusOK, response.ResponseBizError(err)) return } ctx.JSON(http.StatusOK, response.Success()) diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 69de79c10..813b77c8c 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -242,6 +242,15 @@ func reqRepoWriter(unitTypes ...models.UnitType) macaron.Handler { } } +func reqWeChat() macaron.Handler { + return func(ctx *context.Context) { + if setting.WechatAuthSwitch && ctx.User.WechatOpenId == "" { + ctx.JSON(http.StatusForbidden, models.BaseErrorMessageApi("settings.no_wechat_bind")) + return + } + } +} + // reqRepoReader user should have specific read permission or be a repo admin or a site admin func reqRepoReader(unitType models.UnitType) macaron.Handler { return func(ctx *context.Context) { @@ -517,6 +526,25 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/markdown", bind(api.MarkdownOption{}), misc.Markdown) m.Post("/markdown/raw", misc.MarkdownRaw) + m.Group("/images", func() { + + m.Get("/public", repo.GetPublicImages) + m.Get("/custom", repo.GetCustomImages) + m.Get("/star", repo.GetStarImages) + m.Get("/npu", repo.GetNpuImages) + + }, reqToken()) + + m.Group("/attachments", func() { + + m.Get("/:uuid", repo.GetAttachment) + m.Get("/get_chunks", repo.GetSuccessChunks) + m.Get("/new_multipart", repo.NewMultipart) + m.Get("/get_multipart_url", repo.GetMultipartUploadUrl) + m.Post("/complete_multipart", repo.CompleteMultipart) + + }, reqToken()) + // Notifications m.Group("/notifications", func() { m.Combo(""). @@ -701,6 +729,13 @@ func RegisterRoutes(m *macaron.Macaron) { m.Combo("/repositories/:id", reqToken()).Get(repo.GetByID) + m.Group("/datasets/:username/:reponame", func() { + m.Get("/current_repo", repo.CurrentRepoDatasetMultiple) + m.Get("/my_datasets", repo.MyDatasetsMultiple) + m.Get("/public_datasets", repo.PublicDatasetMultiple) + m.Get("/my_favorite", repo.MyFavoriteDatasetMultiple) + }, reqToken(), repoAssignment()) + m.Group("/repos", func() { m.Get("/search", repo.Search) @@ -709,7 +744,13 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/migrate", reqToken(), bind(auth.MigrateRepoForm{}), repo.Migrate) m.Post("/migrate/submit", reqToken(), bind(auth.MigrateRepoForm{}), repo.MigrateSubmit) + m.Group("/specification", func() { + m.Get("", repo.GetResourceSpec) + }, reqToken()) + m.Group("/:username/:reponame", func() { + m.Get("/right", reqToken(), repo.GetRight) + m.Get("/tagger", reqToken(), repo.ListTagger) m.Combo("").Get(reqAnyRepoReader(), repo.Get). Delete(reqToken(), reqOwner(), repo.Delete). Patch(reqToken(), reqAdmin(), bind(api.EditRepoOption{}), context.RepoRef(), repo.Edit) @@ -938,21 +979,39 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/:id/log", repo.CloudbrainGetLog) m.Get("/:id/download_log_file", repo.CloudbrainDownloadLogFile) m.Group("/train-job", func() { + + m.Post("/create", reqToken(), reqRepoWriter(models.UnitTypeCloudBrain), reqWeChat(), context.ReferencesGitRepo(false), bind(api.CreateTrainJobOption{}), repo.CreateCloudBrain) + m.Group("/:jobid", func() { m.Get("", repo.GetModelArtsTrainJobVersion) + m.Get("/detail", reqToken(), reqRepoReader(models.UnitTypeCloudBrain), repo.CloudBrainShow) m.Get("/model_list", repo.CloudBrainModelList) m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.CloudBrainStop) }) }) m.Group("/inference-job", func() { + m.Post("/create", reqToken(), reqRepoWriter(models.UnitTypeCloudBrain), reqWeChat(), bind(api.CreateTrainJobOption{}), context.ReferencesGitRepo(false), repo.CreateCloudBrainInferenceTask) + m.Group("/:jobid", func() { m.Get("", repo.GetCloudBrainInferenceJob) + m.Get("/detail", reqToken(), reqRepoReader(models.UnitTypeCloudBrain), repo.CloudBrainShow) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.DelCloudBrainJob) m.Get("/result_list", repo.InferencJobResultList) }) }) }, reqRepoReader(models.UnitTypeCloudBrain)) m.Group("/modelmanage", func() { + m.Post("/create_new_model", repo.CreateNewModel) + m.Get("/show_model_api", repo.ShowModelManageApi) + m.Delete("/delete_model", repo.DeleteModel) + m.Get("/downloadall", repo.DownloadModel) + m.Get("/query_model_byId", repo.QueryModelById) + m.Get("/query_model_for_predict", repo.QueryModelListForPredict) + m.Get("/query_modelfile_for_predict", repo.QueryModelFileForPredict) + m.Get("/query_train_model", repo.QueryTrainModelList) + m.Post("/create_model_convert", repo.CreateModelConvert) + m.Get("/show_model_convert_page") m.Get("/:id", repo.GetCloudbrainModelConvertTask) m.Get("/:id/log", repo.CloudbrainForModelConvertGetLog) m.Get("/:id/modelartlog", repo.TrainJobForModelConvertGetLog) diff --git a/routers/api/v1/repo/attachments.go b/routers/api/v1/repo/attachments.go new file mode 100644 index 000000000..cb36ba2ee --- /dev/null +++ b/routers/api/v1/repo/attachments.go @@ -0,0 +1,25 @@ +package repo + +import ( + "code.gitea.io/gitea/modules/context" + routeRepo "code.gitea.io/gitea/routers/repo" +) + +func GetSuccessChunks(ctx *context.APIContext) { + routeRepo.GetSuccessChunks(ctx.Context) +} + +func NewMultipart(ctx *context.APIContext) { + routeRepo.NewMultipart(ctx.Context) +} +func GetMultipartUploadUrl(ctx *context.APIContext) { + routeRepo.GetMultipartUploadUrl(ctx.Context) +} + +func CompleteMultipart(ctx *context.APIContext) { + routeRepo.CompleteMultipart(ctx.Context) + +} +func GetAttachment(ctx *context.APIContext) { + routeRepo.GetAttachment(ctx.Context) +} diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index 3c120cb54..2e25fdefe 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -16,8 +16,14 @@ import ( "strings" "time" + cloudbrainService "code.gitea.io/gitea/services/cloudbrain" + + "code.gitea.io/gitea/modules/convert" + "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/setting" @@ -31,6 +37,77 @@ import ( routerRepo "code.gitea.io/gitea/routers/repo" ) +func CloudBrainShow(ctx *context.APIContext) { + + task, err := models.GetCloudbrainByJobID(ctx.Params(":jobid")) + + if err != nil { + log.Info("error:" + err.Error()) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("repo.cloudbrain_query_fail")) + return + } + cloudbrainTask.PrepareSpec4Show(task) + task.ContainerIp = "" + if cloudbrainTask.IsTaskNotStop(task) { + cloudbrainTask.SyncTaskStatus(task) + } + + if task.TrainJobDuration == "" { + if task.Duration == 0 { + var duration int64 + if task.Status == string(models.JobWaiting) { + duration = 0 + } else if task.Status == string(models.JobRunning) { + duration = time.Now().Unix() - int64(task.CreatedUnix) + } else { + duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) + } + task.Duration = duration + } + task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) + } + //to unify image output + if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { + task.ImageID = strconv.FormatInt(task.EngineID, 10) + task.Image = task.EngineName + + } else if task.Type == models.TypeC2Net { + task.Image = task.EngineName + } + task.AiCenter = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx.Context) + + ctx.JSON(http.StatusOK, models.BaseMessageWithDataApi{Code: 0, Message: "", Data: convert.ToCloudBrain(task)}) + +} + +func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) { + if option.Type == cloudbrainTask.TaskTypeCloudbrainOne { + cloudbrainTask.CloudbrainOneTrainJobCreate(ctx.Context, option) + } + if option.Type == cloudbrainTask.TaskTypeModelArts { + cloudbrainTask.ModelArtsTrainJobNpuCreate(ctx.Context, option) + } + + if option.Type == cloudbrainTask.TaskTypeGrampusGPU { + cloudbrainTask.GrampusTrainJobGpuCreate(ctx.Context, option) + } + if option.Type == cloudbrainTask.TaskTypeGrampusNPU { + cloudbrainTask.GrampusTrainJobNpuCreate(ctx.Context, option) + } + +} + +func CreateCloudBrainInferenceTask(ctx *context.APIContext, option api.CreateTrainJobOption) { + + if option.Type == 0 { + cloudbrainTask.CloudBrainInferenceJobCreate(ctx.Context, option) + } + if option.Type == 1 { + cloudbrainTask.ModelArtsInferenceJobCreate(ctx.Context, option) + } + +} + // cloudbrain get job task by jobid func GetCloudbrainTask(ctx *context.APIContext) { // swagger:operation GET /repos/{owner}/{repo}/cloudbrain/{jobid} cloudbrain jobTask diff --git a/routers/api/v1/repo/datasets.go b/routers/api/v1/repo/datasets.go new file mode 100644 index 000000000..0223cf740 --- /dev/null +++ b/routers/api/v1/repo/datasets.go @@ -0,0 +1,123 @@ +package repo + +import ( + "fmt" + "strings" + + "code.gitea.io/gitea/modules/convert" + + api "code.gitea.io/gitea/modules/structs" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" +) + +func PublicDatasetMultiple(ctx *context.APIContext) { + + opts := &models.SearchDatasetOptions{ + PublicOnly: true, + NeedAttachment: true, + CloudBrainType: ctx.QueryInt("type"), + } + datasetMultiple(ctx, opts) + +} + +func MyFavoriteDatasetMultiple(ctx *context.APIContext) { + + opts := &models.SearchDatasetOptions{ + StarByMe: true, + DatasetIDs: models.GetDatasetIdsStarByUser(ctx.User.ID), + NeedAttachment: true, + CloudBrainType: ctx.QueryInt("type"), + } + datasetMultiple(ctx, opts) +} + +func CurrentRepoDatasetMultiple(ctx *context.APIContext) { + datasetIds := models.GetDatasetIdsByRepoID(ctx.Repo.Repository.ID) + searchOrderBy := getSearchOrderByInValues(datasetIds) + opts := &models.SearchDatasetOptions{ + RepoID: ctx.Repo.Repository.ID, + NeedAttachment: true, + CloudBrainType: ctx.QueryInt("type"), + DatasetIDs: datasetIds, + SearchOrderBy: searchOrderBy, + } + + datasetMultiple(ctx, opts) + +} + +func MyDatasetsMultiple(ctx *context.APIContext) { + + opts := &models.SearchDatasetOptions{ + UploadAttachmentByMe: true, + NeedAttachment: true, + CloudBrainType: ctx.QueryInt("type"), + } + datasetMultiple(ctx, opts) + +} +func datasetMultiple(ctx *context.APIContext, opts *models.SearchDatasetOptions) { + page := ctx.QueryInt("page") + if page < 1 { + page = 1 + } + pageSize := ctx.QueryInt("pageSize") + if pageSize < 1 { + pageSize = setting.UI.DatasetPagingNum + } + + keyword := strings.Trim(ctx.Query("q"), " ") + opts.Keyword = keyword + if opts.SearchOrderBy.String() == "" { + opts.SearchOrderBy = models.SearchOrderByRecentUpdated + } + + opts.RecommendOnly = ctx.QueryBool("recommend") + opts.ListOptions = models.ListOptions{ + Page: page, + PageSize: pageSize, + } + opts.JustNeedZipFile = true + opts.User = ctx.User + + datasets, count, err := models.SearchDataset(opts) + + if err != nil { + log.Error("json.Marshal failed:", err.Error()) + ctx.JSON(200, map[string]interface{}{ + "code": 1, + "message": err.Error(), + "data": []*api.Dataset{}, + "count": 0, + }) + return + } + var convertDatasets []*api.Dataset + for _, dataset := range datasets { + convertDatasets = append(convertDatasets, convert.ToDataset(dataset)) + } + + ctx.JSON(200, map[string]interface{}{ + "code": 0, + "message": "", + "data": convertDatasets, + "count": count, + }) +} + +func getSearchOrderByInValues(datasetIds []int64) models.SearchOrderBy { + if len(datasetIds) == 0 { + return "" + } + searchOrderBy := "CASE id " + for i, id := range datasetIds { + searchOrderBy += fmt.Sprintf(" WHEN %d THEN %d", id, i+1) + } + searchOrderBy += " ELSE 0 END" + return models.SearchOrderBy(searchOrderBy) +} diff --git a/routers/api/v1/repo/images.go b/routers/api/v1/repo/images.go new file mode 100644 index 000000000..f0cb62980 --- /dev/null +++ b/routers/api/v1/repo/images.go @@ -0,0 +1,141 @@ +package repo + +import ( + "encoding/json" + "net/http" + "strconv" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/grampus" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/modelarts" + "code.gitea.io/gitea/modules/setting" +) + +type NPUImageINFO struct { + ID string `json:"id"` + Value string `json:"value"` +} + +func GetPublicImages(ctx *context.APIContext) { + uid := getUID(ctx) + opts := models.SearchImageOptions{ + IncludePublicOnly: true, + UID: uid, + Keyword: ctx.Query("q"), + Topics: ctx.Query("topic"), + IncludeOfficialOnly: ctx.QueryBool("recommend"), + SearchOrderBy: "type desc, num_stars desc,id desc", + Status: models.IMAGE_STATUS_SUCCESS, + CloudbrainType: ctx.QueryInt("cloudbrainType"), + } + + getImages(ctx, &opts) + +} + +func GetCustomImages(ctx *context.APIContext) { + uid := getUID(ctx) + opts := models.SearchImageOptions{ + UID: uid, + IncludeOwnerOnly: true, + Keyword: ctx.Query("q"), + Topics: ctx.Query("topic"), + Status: -1, + SearchOrderBy: "id desc", + } + getImages(ctx, &opts) + +} +func GetStarImages(ctx *context.APIContext) { + + uid := getUID(ctx) + opts := models.SearchImageOptions{ + UID: uid, + IncludeStarByMe: true, + Keyword: ctx.Query("q"), + Topics: ctx.Query("topic"), + Status: models.IMAGE_STATUS_SUCCESS, + SearchOrderBy: "id desc", + } + getImages(ctx, &opts) + +} + +func GetNpuImages(ctx *context.APIContext) { + cloudbrainType := ctx.QueryInt("type") + if cloudbrainType == 0 { //modelarts + getModelArtsImages(ctx) + } else { //c2net + getC2netNpuImages(ctx) + } +} + +func getModelArtsImages(ctx *context.APIContext) { + + var versionInfos modelarts.VersionInfo + _ = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos) + var npuImageInfos []NPUImageINFO + for _, info := range versionInfos.Version { + npuImageInfos = append(npuImageInfos, NPUImageINFO{ + ID: strconv.Itoa(info.ID), + Value: info.Value, + }) + } + ctx.JSON(http.StatusOK, npuImageInfos) + +} + +func getC2netNpuImages(ctx *context.APIContext) { + images, err := grampus.GetImages(grampus.ProcessorTypeNPU) + var npuImageInfos []NPUImageINFO + if err != nil { + log.Error("GetImages failed:", err.Error()) + ctx.JSON(http.StatusOK, []NPUImageINFO{}) + } else { + for _, info := range images.Infos { + npuImageInfos = append(npuImageInfos, NPUImageINFO{ + ID: info.ID, + Value: info.Name, + }) + } + ctx.JSON(http.StatusOK, npuImageInfos) + } +} +func getImages(ctx *context.APIContext, opts *models.SearchImageOptions) { + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + + pageSize := ctx.QueryInt("pageSize") + if pageSize <= 0 { + pageSize = 15 + } + opts.ListOptions = models.ListOptions{ + Page: page, + PageSize: pageSize, + } + imageList, total, err := models.SearchImage(opts) + if err != nil { + log.Error("Can not get images:%v", err) + ctx.JSON(http.StatusOK, models.ImagesPageResult{ + Count: 0, + Images: []*models.Image{}, + }) + } else { + ctx.JSON(http.StatusOK, models.ImagesPageResult{ + Count: total, + Images: imageList, + }) + } +} + +func getUID(ctx *context.APIContext) int64 { + var uid int64 = -1 + if ctx.IsSigned { + uid = ctx.User.ID + } + return uid +} diff --git a/routers/api/v1/repo/mlops.go b/routers/api/v1/repo/mlops.go new file mode 100644 index 000000000..43969330d --- /dev/null +++ b/routers/api/v1/repo/mlops.go @@ -0,0 +1,71 @@ +package repo + +import ( + "net/http" + + "code.gitea.io/gitea/models" + + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/convert" + "code.gitea.io/gitea/modules/log" + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/routers/api/v1/utils" +) + +//标注任务可分配人员 +func ListTagger(ctx *context.APIContext) { + + taggers := make([]*api.Tagger, 0) + userRemember := make(map[string]string) + collaborators, err := ctx.Repo.Repository.GetCollaborators(utils.GetListOptions(ctx)) + if err != nil { + log.Warn("ListCollaborators", err) + ctx.JSON(http.StatusOK, taggers) + return + } + for _, collaborator := range collaborators { + taggers = append(taggers, convert.ToTagger(collaborator.User)) + userRemember[collaborator.User.Name] = "" + } + + teams, err := ctx.Repo.Repository.GetRepoTeams() + if err != nil { + log.Warn("ListTeams", err) + ctx.JSON(http.StatusOK, taggers) + return + } + + for _, team := range teams { + team.GetMembers(&models.SearchMembersOptions{}) + for _, user := range team.Members { + if _, ok := userRemember[user.Name]; !ok { + taggers = append(taggers, convert.ToTagger(user)) + userRemember[user.Name] = "" + } + } + } + if !ctx.Repo.Owner.IsOrganization() { + if _, ok := userRemember[ctx.Repo.Owner.Name]; !ok { + taggers = append(taggers, convert.ToTagger(ctx.Repo.Owner)) + + } + } + ctx.JSON(http.StatusOK, taggers) + +} +func GetRight(ctx *context.APIContext) { + right := "none" + + if ctx.IsUserRepoReaderSpecific(models.UnitTypeCode) { + right = "read" + } + + if ctx.IsUserRepoWriter([]models.UnitType{models.UnitTypeCode}) || ctx.IsUserRepoAdmin() { + right = "write" + } + + ctx.JSON(http.StatusOK, map[string]string{ + "right": right, + }) + +} diff --git a/routers/api/v1/repo/modelmanage.go b/routers/api/v1/repo/modelmanage.go new file mode 100644 index 000000000..2c1fd9f01 --- /dev/null +++ b/routers/api/v1/repo/modelmanage.go @@ -0,0 +1,106 @@ +package repo + +import ( + "net/http" + + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/storage" + routerRepo "code.gitea.io/gitea/routers/repo" +) + +type FileInfo struct { + FileName string `json:"fileName"` + ModTime string `json:"modTime"` + IsDir bool `json:"isDir"` + Size int64 `json:"size"` + ParenDir string `json:"parenDir"` + UUID string `json:"uuid"` +} + +func CreateNewModel(ctx *context.APIContext) { + log.Info("CreateNewModel by api.") + routerRepo.SaveModel(ctx.Context) +} + +func ShowModelManageApi(ctx *context.APIContext) { + log.Info("ShowModelManageApi by api.") + routerRepo.ShowModelPageInfo(ctx.Context) +} + +func DeleteModel(ctx *context.APIContext) { + log.Info("DeleteModel by api.") + routerRepo.DeleteModel(ctx.Context) +} + +func DownloadModel(ctx *context.APIContext) { + log.Info("DownloadModel by api.") + routerRepo.DownloadMultiModelFile(ctx.Context) +} + +func QueryModelById(ctx *context.APIContext) { + log.Info("QueryModelById by api.") + routerRepo.QueryModelById(ctx.Context) +} + +func QueryModelListForPredict(ctx *context.APIContext) { + log.Info("QueryModelListForPredict by api.") + routerRepo.QueryModelListForPredict(ctx.Context) +} + +func QueryTrainModelList(ctx *context.APIContext) { + result, err := routerRepo.QueryTrainModelFileById(ctx.Context) + if err != nil { + log.Info("query error." + err.Error()) + } + re := convertFileFormat(result) + ctx.JSON(http.StatusOK, re) +} + +func convertFileFormat(result []storage.FileInfo) []FileInfo { + re := make([]FileInfo, 0) + if result != nil { + for _, file := range result { + tmpFile := FileInfo{ + FileName: file.FileName, + ModTime: file.ModTime, + IsDir: file.IsDir, + Size: file.Size, + ParenDir: file.ParenDir, + UUID: file.UUID, + } + re = append(re, tmpFile) + } + } + return re +} + +func QueryModelFileForPredict(ctx *context.APIContext) { + log.Info("QueryModelFileForPredict by api.") + id := ctx.Query("id") + result := routerRepo.QueryModelFileByID(id) + re := convertFileFormat(result) + ctx.JSON(http.StatusOK, re) +} + +func CreateModelConvert(ctx *context.APIContext) { + log.Info("CreateModelConvert by api.") + routerRepo.SaveModelConvert(ctx.Context) +} + +func ShowModelConvertPage(ctx *context.APIContext) { + log.Info("ShowModelConvertPage by api.") + modelResult, count, err := routerRepo.GetModelConvertPageData(ctx.Context) + if err == nil { + mapInterface := make(map[string]interface{}) + mapInterface["data"] = modelResult + mapInterface["count"] = count + ctx.JSON(http.StatusOK, mapInterface) + } else { + mapInterface := make(map[string]interface{}) + mapInterface["data"] = nil + mapInterface["count"] = 0 + ctx.JSON(http.StatusOK, mapInterface) + } + +} diff --git a/routers/api/v1/repo/spec.go b/routers/api/v1/repo/spec.go new file mode 100644 index 000000000..a8aa28d98 --- /dev/null +++ b/routers/api/v1/repo/spec.go @@ -0,0 +1,36 @@ +package repo + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/routers/response" + "code.gitea.io/gitea/services/cloudbrain/resource" +) + +func GetResourceSpec(ctx *context.APIContext) { + jobType := ctx.Query("jobType") + computeResource := ctx.Query("compute") + cluster := ctx.Query("cluster") + aiCenterCode := ctx.Query("center") + if jobType == "" || computeResource == "" || cluster == "" { + log.Info("GetResourceSpec api.param error") + ctx.JSON(200, response.OuterBizError(response.PARAM_ERROR)) + return + } + specs, err := resource.FindAvailableSpecs4Show(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobType(jobType), + ComputeResource: computeResource, + Cluster: cluster, + AiCenterCode: aiCenterCode, + }) + if err != nil { + log.Error("GetResourceSpec api error. %v", err) + ctx.JSON(200, response.OuterServerError(err.Error())) + return + } + + specMap := make(map[string]interface{}, 0) + specMap["specs"] = specs + ctx.JSON(200, response.OuterSuccessWithData(specMap)) +} diff --git a/routers/repo/ai_model_convert.go b/routers/repo/ai_model_convert.go index bd6a01072..560ace8fd 100644 --- a/routers/repo/ai_model_convert.go +++ b/routers/repo/ai_model_convert.go @@ -74,27 +74,27 @@ func SaveModelConvert(ctx *context.Context) { log.Info("save model convert start.") if !ctx.Repo.CanWrite(models.UnitTypeModelManage) { ctx.JSON(200, map[string]string{ - "result_code": "1", - "message": ctx.Tr("repo.modelconvert.manage.no_operate_right"), + "code": "1", + "msg": ctx.Tr("repo.modelconvert.manage.no_operate_right"), }) return } name := ctx.Query("name") desc := ctx.Query("desc") modelId := ctx.Query("modelId") - modelPath := ctx.Query("ModelFile") - SrcEngine := ctx.QueryInt("SrcEngine") + modelPath := ctx.Query("modelFile") + SrcEngine := ctx.QueryInt("srcEngine") InputShape := ctx.Query("inputshape") InputDataFormat := ctx.Query("inputdataformat") - DestFormat := ctx.QueryInt("DestFormat") - NetOutputFormat := ctx.QueryInt("NetOutputFormat") + DestFormat := ctx.QueryInt("destFormat") + NetOutputFormat := ctx.QueryInt("netOutputFormat") task, err := models.QueryModelById(modelId) if err != nil { log.Error("no such model!", err.Error()) ctx.JSON(200, map[string]string{ - "result_code": "1", - "message": ctx.Tr("repo.modelconvert.manage.model_not_exist"), + "code": "1", + "msg": ctx.Tr("repo.modelconvert.manage.model_not_exist"), }) return } @@ -105,8 +105,8 @@ func SaveModelConvert(ctx *context.Context) { if convert.Name == name { log.Info("convert.Name=" + name + " convert.id=" + convert.ID) ctx.JSON(200, map[string]string{ - "result_code": "1", - "message": ctx.Tr("repo.modelconvert.manage.create_error1"), + "code": "1", + "msg": ctx.Tr("repo.modelconvert.manage.create_error1"), }) return } @@ -119,8 +119,8 @@ func SaveModelConvert(ctx *context.Context) { if isRunningTask(convert.Status) { log.Info("convert.Status=" + convert.Status + " convert.id=" + convert.ID) ctx.JSON(200, map[string]string{ - "result_code": "1", - "message": ctx.Tr("repo.modelconvert.manage.create_error2"), + "code": "1", + "msg": ctx.Tr("repo.modelconvert.manage.create_error2"), }) return } @@ -150,7 +150,7 @@ func SaveModelConvert(ctx *context.Context) { go goCreateTask(modelConvert, ctx, task) ctx.JSON(200, map[string]string{ - "result_code": "0", + "code": "0", }) } @@ -604,11 +604,11 @@ func StopModelConvert(ctx *context.Context) { } func ShowModelConvertInfo(ctx *context.Context) { - ctx.Data["ID"] = ctx.Query("ID") + ctx.Data["ID"] = ctx.Query("id") ctx.Data["isModelManage"] = true ctx.Data["ModelManageAccess"] = ctx.Repo.CanWrite(models.UnitTypeModelManage) - job, err := models.QueryModelConvertById(ctx.Query("ID")) + job, err := models.QueryModelConvertById(ctx.Query("id")) if err == nil { if job.TrainJobDuration == "" { job.TrainJobDuration = "00:00:00" @@ -715,6 +715,26 @@ func ShowModelConvertPageInfo(ctx *context.Context) { if pageSize <= 0 { pageSize = setting.UI.IssuePagingNum } + modelResult, count, err := GetModelConvertPageData(ctx) + if err == nil { + pager := context.NewPagination(int(count), page, pageSize, 5) + ctx.Data["Page"] = pager + ctx.Data["Tasks"] = modelResult + ctx.Data["MODEL_CONVERT_COUNT"] = count + } else { + ctx.ServerError("Query data error.", err) + } +} + +func GetModelConvertPageData(ctx *context.Context) ([]*models.AiModelConvert, int64, error) { + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + pageSize := ctx.QueryInt("pageSize") + if pageSize <= 0 { + pageSize = setting.UI.IssuePagingNum + } repoId := ctx.Repo.Repository.ID modelResult, count, err := models.QueryModelConvert(&models.AiModelQueryOptions{ ListOptions: models.ListOptions{ @@ -725,10 +745,8 @@ func ShowModelConvertPageInfo(ctx *context.Context) { }) if err != nil { log.Info("query db error." + err.Error()) - ctx.ServerError("Cloudbrain", err) - return + return nil, 0, err } - ctx.Data["MODEL_CONVERT_COUNT"] = count userIds := make([]int64, len(modelResult)) for i, model := range modelResult { model.IsCanOper = isOper(ctx, model.UserId) @@ -743,10 +761,7 @@ func ShowModelConvertPageInfo(ctx *context.Context) { model.UserRelAvatarLink = value.RelAvatarLink() } } - pager := context.NewPagination(int(count), page, pageSize, 5) - ctx.Data["Page"] = pager - ctx.Data["Tasks"] = modelResult - + return modelResult, count, nil } func ModelConvertDownloadModel(ctx *context.Context) { @@ -757,7 +772,7 @@ func ModelConvertDownloadModel(ctx *context.Context) { ctx.ServerError("Not found task.", err) return } - AllDownload := ctx.QueryBool("AllDownload") + AllDownload := ctx.QueryBool("allDownload") if AllDownload { if job.IsGpuTrainTask() { path := setting.CBCodePathPrefix + job.ID + "/model/" diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index f2b0fc6d1..1bef11703 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -34,13 +34,13 @@ const ( STATUS_ERROR = 2 ) -func saveModelByParameters(jobId string, versionName string, name string, version string, label string, description string, engine int, ctx *context.Context) error { +func saveModelByParameters(jobId string, versionName string, name string, version string, label string, description string, engine int, ctx *context.Context) (string, error) { aiTask, err := models.GetCloudbrainByJobIDAndVersionName(jobId, versionName) if err != nil { aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId) if err != nil { log.Info("query task error." + err.Error()) - return err + return "", err } else { log.Info("query gpu train task.") } @@ -56,7 +56,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio if len(aimodels) > 0 { for _, model := range aimodels { if model.Version == version { - return errors.New(ctx.Tr("repo.model.manage.create_error")) + return "", errors.New(ctx.Tr("repo.model.manage.create_error")) } if model.New == MODEL_LATEST { lastNewModelId = model.ID @@ -111,7 +111,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio err = models.SaveModelToDb(model) if err != nil { - return err + return "", err } if len(lastNewModelId) > 0 { //udpate status and version count @@ -134,7 +134,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio log.Info("save model end.") notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, id, name, models.ActionCreateNewModelTask) - return nil + return id, nil } func asyncToCopyModel(aiTask *models.Cloudbrain, id string, modelSelectedFile string) { @@ -173,7 +173,7 @@ func SaveNewNameModel(ctx *context.Context) { ctx.Error(403, ctx.Tr("repo.model_noright")) return } - name := ctx.Query("Name") + name := ctx.Query("name") if name == "" { ctx.Error(500, fmt.Sprintf("name or version is null.")) return @@ -195,38 +195,42 @@ func SaveModel(ctx *context.Context) { return } log.Info("save model start.") - JobId := ctx.Query("JobId") - VersionName := ctx.Query("VersionName") - name := ctx.Query("Name") - version := ctx.Query("Version") - label := ctx.Query("Label") - description := ctx.Query("Description") - engine := ctx.QueryInt("Engine") + JobId := ctx.Query("jobId") + VersionName := ctx.Query("versionName") + name := ctx.Query("name") + version := ctx.Query("version") + label := ctx.Query("label") + description := ctx.Query("description") + engine := ctx.QueryInt("engine") modelSelectedFile := ctx.Query("modelSelectedFile") log.Info("engine=" + fmt.Sprint(engine) + " modelSelectedFile=" + modelSelectedFile) - + re := map[string]string{ + "code": "-1", + } if JobId == "" || VersionName == "" { - ctx.Error(500, fmt.Sprintf("JobId or VersionName is null.")) + re["msg"] = "JobId or VersionName is null." + ctx.JSON(200, re) return } if modelSelectedFile == "" { - ctx.Error(500, fmt.Sprintf("Not selected model file.")) + re["msg"] = "Not selected model file." + ctx.JSON(200, re) return } - if name == "" || version == "" { - ctx.Error(500, fmt.Sprintf("name or version is null.")) + re["msg"] = "name or version is null." + ctx.JSON(200, re) return } - - err := saveModelByParameters(JobId, VersionName, name, version, label, description, engine, ctx) - + id, err := saveModelByParameters(JobId, VersionName, name, version, label, description, engine, ctx) if err != nil { log.Info("save model error." + err.Error()) - ctx.Error(500, fmt.Sprintf("save model error. %v", err)) - return + re["msg"] = err.Error() + } else { + re["code"] = "0" + re["id"] = id } - ctx.Status(200) + ctx.JSON(200, re) log.Info("save model end.") } @@ -291,13 +295,17 @@ func downloadModelFromCloudBrainOne(modelUUID string, jobName string, parentDir func DeleteModel(ctx *context.Context) { log.Info("delete model start.") - id := ctx.Query("ID") + id := ctx.Query("id") err := deleteModelByID(ctx, id) if err != nil { - ctx.JSON(500, err.Error()) + re := map[string]string{ + "code": "-1", + } + re["msg"] = err.Error() + ctx.JSON(200, re) } else { ctx.JSON(200, map[string]string{ - "result_code": "0", + "code": "0", }) } } @@ -354,7 +362,7 @@ func QueryModelByParameters(repoId int64, page int) ([]*models.AiModelManage, in func DownloadMultiModelFile(ctx *context.Context) { log.Info("DownloadMultiModelFile start.") - id := ctx.Query("ID") + id := ctx.Query("id") log.Info("id=" + id) task, err := models.QueryModelById(id) if err != nil { @@ -487,7 +495,10 @@ func downloadFromCloudBrainTwo(path string, task *models.AiModelManage, ctx *con func QueryTrainJobVersionList(ctx *context.Context) { log.Info("query train job version list. start.") - JobID := ctx.Query("JobID") + JobID := ctx.Query("jobId") + if JobID == "" { + JobID = ctx.Query("JobId") + } VersionListTasks, count, err := models.QueryModelTrainJobVersionList(JobID) @@ -515,20 +526,33 @@ func QueryTrainJobList(ctx *context.Context) { } -func QueryTrainModelList(ctx *context.Context) { - log.Info("query train job list. start.") - jobName := ctx.Query("jobName") - taskType := ctx.QueryInt("type") - VersionName := ctx.Query("VersionName") +func QueryTrainModelFileById(ctx *context.Context) ([]storage.FileInfo, error) { + JobID := ctx.Query("jobId") + VersionListTasks, count, err := models.QueryModelTrainJobVersionList(JobID) + if err == nil { + if count == 1 { + task := VersionListTasks[0] + jobName := task.JobName + taskType := task.Type + VersionName := task.VersionName + modelDbResult, err := getModelFromObjectSave(jobName, taskType, VersionName) + return modelDbResult, err + } + } + log.Info("get TypeCloudBrainTwo TrainJobListModel failed:", err) + return nil, errors.New("Not found task.") +} + +func getModelFromObjectSave(jobName string, taskType int, VersionName string) ([]storage.FileInfo, error) { if taskType == models.TypeCloudBrainTwo { objectkey := path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, VersionName) + "/" modelDbResult, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, objectkey) log.Info("bucket=" + setting.Bucket + " objectkey=" + objectkey) if err != nil { log.Info("get TypeCloudBrainTwo TrainJobListModel failed:", err) + return nil, err } else { - ctx.JSON(200, modelDbResult) - return + return modelDbResult, nil } } else if taskType == models.TypeCloudBrainOne { modelSrcPrefix := setting.CBCodePathPrefix + jobName + "/model/" @@ -536,12 +560,30 @@ func QueryTrainModelList(ctx *context.Context) { modelDbResult, err := storage.GetAllObjectByBucketAndPrefixMinio(bucketName, modelSrcPrefix) if err != nil { log.Info("get TypeCloudBrainOne TrainJobListModel failed:", err) + return nil, err } else { - ctx.JSON(200, modelDbResult) - return + return modelDbResult, nil } } - ctx.JSON(200, "") + return nil, errors.New("Not support.") +} + +func QueryTrainModelList(ctx *context.Context) { + log.Info("query train job list. start.") + jobName := ctx.Query("jobName") + taskType := ctx.QueryInt("type") + VersionName := ctx.Query("versionName") + if VersionName == "" { + VersionName = ctx.Query("VersionName") + } + modelDbResult, err := getModelFromObjectSave(jobName, taskType, VersionName) + if err != nil { + log.Info("get TypeCloudBrainTwo TrainJobListModel failed:", err) + ctx.JSON(200, "") + } else { + ctx.JSON(200, modelDbResult) + return + } } func DownloadSingleModelFile(ctx *context.Context) { @@ -612,7 +654,7 @@ func DownloadSingleModelFile(ctx *context.Context) { } func ShowModelInfo(ctx *context.Context) { - ctx.Data["ID"] = ctx.Query("ID") + ctx.Data["ID"] = ctx.Query("id") ctx.Data["name"] = ctx.Query("name") ctx.Data["isModelManage"] = true ctx.Data["ModelManageAccess"] = ctx.Repo.CanWrite(models.UnitTypeModelManage) @@ -620,6 +662,19 @@ func ShowModelInfo(ctx *context.Context) { ctx.HTML(200, tplModelInfo) } +func QueryModelById(ctx *context.Context) { + id := ctx.Query("id") + model, err := models.QueryModelById(id) + if err == nil { + model.IsCanOper = isOper(ctx, model.UserId) + model.IsCanDelete = isCanDelete(ctx, model.UserId) + removeIpInfo(model) + ctx.JSON(http.StatusOK, model) + } else { + ctx.JSON(http.StatusNotFound, nil) + } +} + func ShowSingleModel(ctx *context.Context) { name := ctx.Query("name") @@ -828,8 +883,8 @@ func ModifyModel(id string, description string) error { func ModifyModelInfo(ctx *context.Context) { log.Info("modify model start.") - id := ctx.Query("ID") - description := ctx.Query("Description") + id := ctx.Query("id") + description := ctx.Query("description") task, err := models.QueryModelById(id) if err != nil { @@ -894,28 +949,36 @@ func QueryModelListForPredict(ctx *context.Context) { } func QueryModelFileForPredict(ctx *context.Context) { - id := ctx.Query("ID") + id := ctx.Query("id") + if id == "" { + id = ctx.Query("ID") + } + ctx.JSON(http.StatusOK, QueryModelFileByID(id)) +} + +func QueryModelFileByID(id string) []storage.FileInfo { model, err := models.QueryModelById(id) if err == nil { if model.Type == models.TypeCloudBrainTwo { prefix := model.Path[len(setting.Bucket)+1:] fileinfos, _ := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, prefix) - ctx.JSON(http.StatusOK, fileinfos) + return fileinfos } else if model.Type == models.TypeCloudBrainOne { prefix := model.Path[len(setting.Attachment.Minio.Bucket)+1:] fileinfos, _ := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, prefix) - ctx.JSON(http.StatusOK, fileinfos) + return fileinfos } } else { log.Error("no such model!", err.Error()) - ctx.ServerError("no such model:", err) - return } - + return nil } func QueryOneLevelModelFile(ctx *context.Context) { - id := ctx.Query("ID") + id := ctx.Query("id") + if id == "" { + id = ctx.Query("ID") + } parentDir := ctx.Query("parentDir") model, err := models.QueryModelById(id) if err != nil { diff --git a/routers/repo/aisafety.go b/routers/repo/aisafety.go index e274f808e..b638a486b 100644 --- a/routers/repo/aisafety.go +++ b/routers/repo/aisafety.go @@ -804,7 +804,7 @@ func createForNPU(ctx *context.Context, jobName string) error { JobType: string(models.JobTypeModelSafety), } - err = modelarts.GenerateInferenceJob(ctx, req) + _, err = modelarts.GenerateInferenceJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) return err @@ -901,7 +901,7 @@ func createForGPU(ctx *context.Context, jobName string) error { LabelName: evaluationIndex, } - err = cloudbrain.GenerateTask(req) + _, err = cloudbrain.GenerateTask(req) if err != nil { return err } diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index aac941101..61c2925b8 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -398,7 +398,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } - err = cloudbrain.GenerateTask(req) + _, err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) @@ -584,7 +584,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra Spec: spec, } - err = cloudbrain.GenerateTask(req) + _, err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) @@ -2487,7 +2487,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo Spec: spec, } - err = cloudbrain.GenerateTask(req) + _, err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form) @@ -2641,7 +2641,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) Spec: spec, } - err = cloudbrain.GenerateTask(req) + _, err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) diff --git a/routers/repo/dataset.go b/routers/repo/dataset.go index 16e21e43b..1f445492d 100755 --- a/routers/repo/dataset.go +++ b/routers/repo/dataset.go @@ -47,8 +47,8 @@ func newFilterPrivateAttachments(ctx *context.Context, list []*models.Attachment permission := false if !permission && ctx.User != nil { isCollaborator, _ := repo.IsCollaborator(ctx.User.ID) - isInRepoTeam,_:=repo.IsInRepoTeam(ctx.User.ID) - if isCollaborator ||isInRepoTeam { + isInRepoTeam, _ := repo.IsInRepoTeam(ctx.User.ID) + if isCollaborator || isInRepoTeam { log.Info("Collaborator user may visit the attach.") permission = true } @@ -349,96 +349,6 @@ func DatasetAction(ctx *context.Context) { } -func CurrentRepoDataset(ctx *context.Context) { - page := ctx.QueryInt("page") - cloudbrainType := ctx.QueryInt("type") - keyword := strings.Trim(ctx.Query("q"), " ") - - repo := ctx.Repo.Repository - var datasetIDs []int64 - dataset, err := models.GetDatasetByRepo(repo) - if err != nil { - ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("GetDatasetByRepo failed", err))) - return - } - datasetIDs = append(datasetIDs, dataset.ID) - datasets, count, err := models.Attachments(&models.AttachmentsOptions{ - ListOptions: models.ListOptions{ - Page: page, - PageSize: setting.UI.DatasetPagingNum, - }, - Keyword: keyword, - NeedDatasetIDs: true, - DatasetIDs: datasetIDs, - Type: cloudbrainType, - NeedIsPrivate: false, - JustNeedZipFile: true, - NeedRepoInfo: true, - }) - if err != nil { - ctx.ServerError("datasets", err) - return - } - - data, err := json.Marshal(datasets) - if err != nil { - log.Error("json.Marshal failed:", err.Error()) - ctx.JSON(200, map[string]string{ - "result_code": "-1", - "error_msg": err.Error(), - "data": "", - }) - return - } - ctx.JSON(200, map[string]string{ - "result_code": "0", - "data": string(data), - "count": strconv.FormatInt(count, 10), - }) -} - -func MyDatasets(ctx *context.Context) { - page := ctx.QueryInt("page") - cloudbrainType := ctx.QueryInt("type") - keyword := strings.Trim(ctx.Query("q"), " ") - - uploaderID := ctx.User.ID - datasets, count, err := models.Attachments(&models.AttachmentsOptions{ - ListOptions: models.ListOptions{ - Page: page, - PageSize: setting.UI.DatasetPagingNum, - }, - Keyword: keyword, - NeedDatasetIDs: false, - UploaderID: uploaderID, - Type: cloudbrainType, - NeedIsPrivate: false, - JustNeedZipFile: true, - NeedRepoInfo: true, - RecommendOnly: ctx.QueryBool("recommend"), - }) - if err != nil { - ctx.ServerError("datasets", err) - return - } - - data, err := json.Marshal(datasets) - if err != nil { - log.Error("json.Marshal failed:", err.Error()) - ctx.JSON(200, map[string]string{ - "result_code": "-1", - "error_msg": err.Error(), - "data": "", - }) - return - } - ctx.JSON(200, map[string]string{ - "result_code": "0", - "data": string(data), - "count": strconv.FormatInt(count, 10), - }) -} - func datasetMultiple(ctx *context.Context, opts *models.SearchDatasetOptions) { page := ctx.QueryInt("page") keyword := strings.Trim(ctx.Query("q"), " ") @@ -593,180 +503,6 @@ func ReferenceDatasetData(ctx *context.Context) { } -func PublicDataset(ctx *context.Context) { - page := ctx.QueryInt("page") - cloudbrainType := ctx.QueryInt("type") - keyword := strings.Trim(ctx.Query("q"), " ") - - datasets, count, err := models.Attachments(&models.AttachmentsOptions{ - ListOptions: models.ListOptions{ - Page: page, - PageSize: setting.UI.DatasetPagingNum, - }, - Keyword: keyword, - NeedDatasetIDs: false, - NeedIsPrivate: true, - IsPrivate: false, - Type: cloudbrainType, - JustNeedZipFile: true, - NeedRepoInfo: true, - RecommendOnly: ctx.QueryBool("recommend"), - }) - if err != nil { - ctx.ServerError("datasets", err) - return - } - - data, err := json.Marshal(datasets) - if err != nil { - log.Error("json.Marshal failed:", err.Error()) - ctx.JSON(200, map[string]string{ - "result_code": "-1", - "error_msg": err.Error(), - "data": "", - }) - return - } - ctx.JSON(200, map[string]string{ - "result_code": "0", - "data": string(data), - "count": strconv.FormatInt(count, 10), - }) -} - -func MyFavoriteDataset(ctx *context.Context) { - UserId := ctx.User.ID - cloudbrainType := ctx.QueryInt("type") - keyword := strings.Trim(ctx.Query("q"), " ") - var NotColDatasetIDs []int64 - var IsColDatasetIDs []int64 - datasetStars, err := models.GetDatasetStarByUser(ctx.User) - if err != nil { - ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("GetDatasetStarByUser failed", err))) - log.Error("GetDatasetStarByUser failed:", err.Error()) - ctx.JSON(200, map[string]string{ - "result_code": "-1", - "error_msg": err.Error(), - "data": "", - }) - return - } - //If the dataset has been deleted, it will not be counted - for _, datasetStar := range datasetStars { - IsExist, repo, dataset, err := IsDatasetStarExist(datasetStar) - if err != nil { - log.Error("IsDatasetStarExist error:", err.Error()) - } - if IsExist { - DatasetIsCollaborator := DatasetIsCollaborator(ctx, dataset) - if repo.OwnerID == ctx.User.ID || DatasetIsCollaborator { - IsColDatasetIDs = append(IsColDatasetIDs, datasetStar.DatasetID) - } else { - NotColDatasetIDs = append(NotColDatasetIDs, datasetStar.DatasetID) - } - } - } - - NotColDatasets, NotColcount, err := models.Attachments(&models.AttachmentsOptions{ - Keyword: keyword, - NeedDatasetIDs: true, - DatasetIDs: NotColDatasetIDs, - NeedIsPrivate: true, - IsPrivate: false, - Type: cloudbrainType, - JustNeedZipFile: true, - NeedRepoInfo: true, - RecommendOnly: ctx.QueryBool("recommend"), - UserId: UserId, - }) - if err != nil { - ctx.ServerError("datasets", err) - return - } - //If is collaborator, there is no need to determine whether the dataset is private or public - IsColDatasets, IsColcount, err := models.Attachments(&models.AttachmentsOptions{ - Keyword: keyword, - NeedDatasetIDs: true, - DatasetIDs: IsColDatasetIDs, - NeedIsPrivate: false, - Type: cloudbrainType, - JustNeedZipFile: true, - NeedRepoInfo: true, - RecommendOnly: ctx.QueryBool("recommend"), - UserId: UserId, - }) - if err != nil { - ctx.ServerError("datasets", err) - return - } - for _, NotColDataset := range NotColDatasets { - IsColDatasets = append(IsColDatasets, NotColDataset) - } - datasets := IsColDatasets - count := NotColcount + IsColcount - sort.Slice(datasets, func(i, j int) bool { - return datasets[i].Attachment.CreatedUnix > datasets[j].Attachment.CreatedUnix - }) - - page := ctx.QueryInt("page") - if page <= 0 { - page = 1 - } - pagesize := ctx.QueryInt("pagesize") - if pagesize <= 0 { - pagesize = 5 - } - pageDatasetsInfo := getPageDatasets(datasets, page, pagesize) - if pageDatasetsInfo == nil { - ctx.JSON(200, map[string]string{ - "result_code": "0", - "data": "[]", - "count": strconv.FormatInt(count, 10), - }) - return - } - data, err := json.Marshal(pageDatasetsInfo) - log.Info("data:", data) - if err != nil { - log.Error("json.Marshal failed:", err.Error()) - ctx.JSON(200, map[string]string{ - "result_code": "-1", - "error_msg": err.Error(), - "data": "", - }) - return - } - ctx.JSON(200, map[string]string{ - "result_code": "0", - "data": string(data), - "count": strconv.FormatInt(count, 10), - }) - -} -func getPageDatasets(AttachmentInfos []*models.AttachmentInfo, page int, pagesize int) []*models.AttachmentInfo { - begin := (page - 1) * pagesize - end := (page) * pagesize - - if begin > len(AttachmentInfos)-1 { - return nil - } - if end > len(AttachmentInfos)-1 { - return AttachmentInfos[begin:] - } else { - return AttachmentInfos[begin:end] - } - -} -func getTotalPage(total int64, pageSize int) int { - - another := 0 - if int(total)%pageSize != 0 { - another = 1 - } - return int(total)/pageSize + another - -} - func GetDatasetStatus(ctx *context.Context) { var ( @@ -791,55 +527,3 @@ func GetDatasetStatus(ctx *context.Context) { "AttachmentStatus": fmt.Sprint(attachment.DecompressState), }) } -func DatasetIsCollaborator(ctx *context.Context, dataset *models.Dataset) bool { - repo, err := models.GetRepositoryByID(dataset.RepoID) - if err != nil { - log.Error("query repo error:", err.Error()) - } else { - repo.GetOwner() - if ctx.User != nil { - if repo.Owner.IsOrganization() { - org := repo.Owner - org.Teams, err = org.GetUserTeams(ctx.User.ID) - if err != nil { - log.Error("GetUserTeams error:", err.Error()) - return false - } - if org.IsUserPartOfOrg(ctx.User.ID) { - for _, t := range org.Teams { - if t.IsMember(ctx.User.ID) && t.HasRepository(repo.ID) { - return true - } - } - isOwner, _ := models.IsOrganizationOwner(repo.OwnerID, ctx.User.ID) - if isOwner { - return isOwner - } - return false - } - } - - isCollaborator, _ := repo.IsCollaborator(ctx.User.ID) - if isCollaborator { - return true - } - } - } - - return false -} -func IsDatasetStarExist(datasetStar *models.DatasetStar) (bool, *models.Repository, *models.Dataset, error) { - dataset, err := models.GetDatasetByID(datasetStar.DatasetID) - if err != nil { - log.Error("query dataset error:", err.Error()) - return false, nil, nil, err - } else { - repo, err := models.GetRepositoryByID(dataset.RepoID) - if err != nil { - log.Error("GetRepositoryByID error:", err.Error()) - return false, nil, nil, err - } - return true, repo, dataset, nil - } - -} diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 4718fe04f..0620350f6 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -474,7 +474,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } - err = grampus.GenerateTrainJob(ctx, req) + _, err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -509,28 +509,6 @@ func GrampusTrainJobVersionCreate(ctx *context.Context, form auth.CreateGrampusT } -func checkSpecialPool(ctx *context.Context, resourceType string) string { - grampus.InitSpecialPool() - if grampus.SpecialPools != nil { - for _, pool := range grampus.SpecialPools.Pools { - - if pool.IsExclusive && pool.Type == resourceType { - - org, _ := models.GetOrgByName(pool.Org) - if org != nil { - isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) - if !isOrgMember { - return ctx.Tr("repo.grampus.no_operate_right") - } - } - } - - } - - } - return "" -} - func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) { ctx.Data["IsCreate"] = true grampusTrainJobNpuCreate(ctx, form) @@ -733,7 +711,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain req.PreTrainModelPath = preTrainModelPath } - err = grampus.GenerateTrainJob(ctx, req) + _, err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) @@ -1003,7 +981,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo if processorType == grampus.ProcessorTypeNPU { //no need to process } else if processorType == grampus.ProcessorTypeGPU { - unZipDatasetCommand := generateDatasetUnzipCommand(datasetName) + unZipDatasetCommand := cloudbrainTask.GenerateDatasetUnzipCommand(datasetName) commandUnzip := "cd " + workDir + "code;unzip -q master.zip;rm -f master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand command += commandUnzip } @@ -1077,31 +1055,6 @@ func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileNa return commandDownloadTemp } -func generateDatasetUnzipCommand(datasetName string) string { - var unZipDatasetCommand string - - datasetNameArray := strings.Split(datasetName, ";") - if len(datasetNameArray) == 1 { //单数据集 - unZipDatasetCommand = "unzip -q '" + datasetName + "';" - if strings.HasSuffix(datasetNameArray[0], ".tar.gz") { - unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';" - } - unZipDatasetCommand += "rm -f '" + datasetName + "';" - - } else { //多数据集 - for _, datasetNameTemp := range datasetNameArray { - if strings.HasSuffix(datasetNameTemp, ".tar.gz") { - unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetNameTemp + "';" - } else { - unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';" - } - unZipDatasetCommand += "rm -f '" + datasetNameTemp + "';" - } - - } - return unZipDatasetCommand -} - func downloadZipCode(ctx *context.Context, codePath, branchName string) error { archiveType := git.ZIP archivePath := codePath diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index be59b0f3f..fabf7e555 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1230,7 +1230,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } - err = modelarts.GenerateTrainJob(ctx, req) + _, err = modelarts.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewDataPrepare(ctx) @@ -2205,7 +2205,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference req.UserCommand = userCommand req.UserImageUrl = userImageUrl - err = modelarts.GenerateInferenceJob(ctx, req) + _, err = modelarts.GenerateInferenceJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) inferenceJobErrorNewDataPrepare(ctx, form) diff --git a/routers/response/api_response.go b/routers/response/api_response.go new file mode 100644 index 000000000..5cc6ff78e --- /dev/null +++ b/routers/response/api_response.go @@ -0,0 +1,30 @@ +package response + +type AiforgeOuterResponse struct { + Code int `json:"code"` + Msg string `json:"msg"` + Data interface{} `json:"data"` +} + +func OuterSuccess() *AiforgeOuterResponse { + return &AiforgeOuterResponse{Code: RESPONSE_CODE_SUCCESS, Msg: RESPONSE_MSG_SUCCESS} +} + +func OuterError(code int, msg string) *AiforgeOuterResponse { + return &AiforgeOuterResponse{Code: code, Msg: msg} +} + +func OuterServerError(msg string) *AiforgeOuterResponse { + return &AiforgeOuterResponse{Code: RESPONSE_CODE_ERROR_DEFAULT, Msg: msg} +} + +func OuterBizError(err *BizError) *AiforgeOuterResponse { + return &AiforgeOuterResponse{Code: err.Code, Msg: err.Err} +} + +func OuterSuccessWithData(data interface{}) *AiforgeOuterResponse { + return &AiforgeOuterResponse{Code: RESPONSE_CODE_SUCCESS, Msg: RESPONSE_MSG_SUCCESS, Data: data} +} +func OuterErrorWithData(code int, msg string, data interface{}) *AiforgeOuterResponse { + return &AiforgeOuterResponse{Code: code, Msg: msg, Data: data} +} diff --git a/routers/response/response.go b/routers/response/response.go index ccd6be445..ff654e5dc 100644 --- a/routers/response/response.go +++ b/routers/response/response.go @@ -24,10 +24,14 @@ func ServerError(msg string) *AiforgeResponse { return &AiforgeResponse{Code: RESPONSE_CODE_ERROR_DEFAULT, Msg: msg} } -func ResponseError(err *BizError) *AiforgeResponse { +func ResponseBizError(err *BizError) *AiforgeResponse { return &AiforgeResponse{Code: err.Code, Msg: err.Err} } +func ResponseError(err error) *AiforgeResponse { + return &AiforgeResponse{Code: RESPONSE_CODE_ERROR_DEFAULT, Msg: err.Error()} +} + func SuccessWithData(data interface{}) *AiforgeResponse { return &AiforgeResponse{Code: RESPONSE_CODE_SUCCESS, Msg: RESPONSE_MSG_SUCCESS, Data: data} } diff --git a/routers/response/response_list.go b/routers/response/response_list.go index bc44e9d68..2fb9effb3 100644 --- a/routers/response/response_list.go +++ b/routers/response/response_list.go @@ -1,6 +1,7 @@ package response -//repo response +var PARAM_ERROR = &BizError{Code: 9001, Err: "param error"} + var RESOURCE_QUEUE_NOT_AVAILABLE = &BizError{Code: 1001, Err: "resource queue not available"} var SPECIFICATION_NOT_EXIST = &BizError{Code: 1002, Err: "specification not exist"} var SPECIFICATION_NOT_AVAILABLE = &BizError{Code: 1003, Err: "specification not available"} @@ -11,4 +12,3 @@ var BADGES_STILL_HAS_USERS = &BizError{Code: 1005, Err: "Please delete users of //common response var SYSTEM_ERROR = &BizError{Code: 9009, Err: "System error.Please try again later"} var INSUFFICIENT_PERMISSION = &BizError{Code: 9003, Err: "insufficient permissions"} -var PARAM_ERROR = &BizError{Code: 9001, Err: "param error permissions"} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index fd8b274e6..322f746f4 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1127,10 +1127,6 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/edit/:id", reqRepoDatasetWriter, repo.EditDataset) m.Post("/reference_datasets", reqRepoDatasetWriterJson, bindIgnErr(auth.ReferenceDatasetForm{}), repo.ReferenceDatasetPost) m.Post("/edit", reqRepoDatasetWriter, bindIgnErr(auth.EditDatasetForm{}), repo.EditDatasetPost) - m.Get("/current_repo", repo.CurrentRepoDataset) - m.Get("/my_datasets", repo.MyDatasets) - m.Get("/public_datasets", repo.PublicDataset) - m.Get("/my_favorite", repo.MyFavoriteDataset) m.Get("/current_repo_m", repo.CurrentRepoDatasetMultiple) m.Get("/my_datasets_m", repo.MyDatasetsMultiple) diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index a9b254618..985706911 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -14,28 +14,28 @@ type StatusInfo struct { ComputeResource string } -var cloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} -var cloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} -var grampusTwoNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} +var CloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} +var CloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} +var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, JobType: []models.JobType{models.JobTypeDebug}, - NotFinalStatuses: cloudbrainOneNotFinalStatuses, + NotFinalStatuses: CloudbrainOneNotFinalStatuses, ComputeResource: models.GPUResource, }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, JobType: []models.JobType{models.JobTypeTrain}, - NotFinalStatuses: cloudbrainOneNotFinalStatuses, + NotFinalStatuses: CloudbrainOneNotFinalStatuses, ComputeResource: models.GPUResource, }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, JobType: []models.JobType{models.JobTypeInference}, - NotFinalStatuses: cloudbrainOneNotFinalStatuses, + NotFinalStatuses: CloudbrainOneNotFinalStatuses, ComputeResource: models.GPUResource, }, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, - NotFinalStatuses: cloudbrainOneNotFinalStatuses, + NotFinalStatuses: CloudbrainOneNotFinalStatuses, ComputeResource: models.GPUResource, }, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter}, @@ -45,22 +45,22 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { CloudBrainTypes: []int{models.TypeCloudBrainTwo}, JobType: []models.JobType{models.JobTypeTrain}, - NotFinalStatuses: cloudbrainTwoNotFinalStatuses, + NotFinalStatuses: CloudbrainTwoNotFinalStatuses, ComputeResource: models.NPUResource, }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { CloudBrainTypes: []int{models.TypeCloudBrainTwo}, JobType: []models.JobType{models.JobTypeInference}, - NotFinalStatuses: cloudbrainTwoNotFinalStatuses, + NotFinalStatuses: CloudbrainTwoNotFinalStatuses, ComputeResource: models.NPUResource, }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { CloudBrainTypes: []int{models.TypeC2Net}, JobType: []models.JobType{models.JobTypeTrain}, - NotFinalStatuses: grampusTwoNotFinalStatuses, + NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.GPUResource, }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { CloudBrainTypes: []int{models.TypeC2Net}, JobType: []models.JobType{models.JobTypeTrain}, - NotFinalStatuses: grampusTwoNotFinalStatuses, + NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, }} @@ -71,7 +71,7 @@ func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, c } key := jobNewType + "-" + strconv.Itoa(cloudbrainType) - if len(computeResource) > 0 { + if len(computeResource) > 0 && cloudbrainType == models.TypeC2Net { key = key + "-" + computeResource[0] } diff --git a/services/cloudbrain/cloudbrainTask/inference.go b/services/cloudbrain/cloudbrainTask/inference.go new file mode 100644 index 000000000..21b271e63 --- /dev/null +++ b/services/cloudbrain/cloudbrainTask/inference.go @@ -0,0 +1,631 @@ +package cloudbrainTask + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "path" + "strconv" + "strings" + "unicode/utf8" + + "code.gitea.io/gitea/modules/modelarts" + + "code.gitea.io/gitea/modules/git" + + api "code.gitea.io/gitea/modules/structs" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/redis/redis_key" + "code.gitea.io/gitea/modules/redis/redis_lock" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/services/cloudbrain/resource" + "code.gitea.io/gitea/services/reward/point/account" +) + +const CLONE_FILE_PREFIX = "file:///" + +func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJobOption) { + + displayJobName := option.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + image := strings.TrimSpace(option.Image) + uuid := option.Attachment + jobType := string(models.JobTypeInference) + codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath + branchName := option.BranchName + bootFile := strings.TrimSpace(option.BootFile) + labelName := option.LabelName + repo := ctx.Repo.Repository + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName)) + defer lock.UnLock() + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err"))) + return + } + + ckptUrl := setting.Attachment.Minio.RealPath + option.PreTrainModelUrl + option.CkptName + log.Info("ckpt url:" + ckptUrl) + command, err := getInferenceJobCommand(option) + if err != nil { + log.Error("getTrainJobCommand failed: %v", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("the job name did already exist")) + return + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error")) + return + } + } + + if !jobNamePattern.MatchString(displayJobName) { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_jobname_err"))) + return + } + + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_bootfile_err"))) + return + } + + count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType) + if err != nil { + log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error")) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob"))) + return + } + } + + if branchName == "" { + branchName = cloudbrain.DefaultBranchName + } + errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ResultPath) + if errStr != "" { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr(errStr))) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + + datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.error.dataset_select"))) + return + } + spec, err := resource.GetAndCheckSpec(ctx.User.ID, option.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Resource specification is not available")) + return + } + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance"))) + return + } + req := cloudbrain.GenerateCloudBrainTaskReq{ + Ctx: ctx, + DisplayJobName: displayJobName, + JobName: jobName, + Image: image, + Command: command, + Uuids: uuid, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), + ModelPath: setting.Attachment.Minio.RealPath + option.PreTrainModelUrl, + BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), + Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), + JobType: jobType, + Description: option.Description, + BranchName: branchName, + BootFile: option.BootFile, + Params: option.Params, + CommitID: commitID, + ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + ModelName: option.ModelName, + ModelVersion: option.ModelVersion, + CkptName: option.CkptName, + TrainUrl: option.PreTrainModelUrl, + LabelName: labelName, + Spec: spec, + } + + jobId, err := cloudbrain.GenerateTask(req) + if err != nil { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId}) +} + +func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJobOption) { + ctx.Data["PageIsTrainJob"] = true + VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) + displayJobName := option.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + uuid := option.Attachment + description := option.Description + workServerNumber := option.WorkServerNumber + engineID, _ := strconv.Atoi(option.ImageID) + bootFile := strings.TrimSpace(option.BootFile) + params := option.Params + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" + //dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" + branchName := option.BranchName + EngineName := option.Image + LabelName := option.LabelName + isLatestVersion := modelarts.IsLatestVersion + VersionCount := modelarts.VersionCountOne + trainUrl := option.PreTrainModelUrl + modelName := option.ModelName + modelVersion := option.ModelVersion + ckptName := option.CkptName + ckptUrl := "/" + option.PreTrainModelUrl + option.CkptName + + errStr := checkInferenceJobMultiNode(ctx.User.ID, option.WorkServerNumber) + if errStr != "" { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr(errStr))) + return + } + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeInference), displayJobName)) + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err"))) + return + } + defer lock.UnLock() + + count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference)) + if err != nil { + log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error")) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting inference task", ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("you have already a running or waiting inference task, can not create more")) + return + } + } + + if err := paramCheckCreateInferenceJob(option); err != nil { + log.Error("paramCheckCreateInferenceJob failed:(%v)", err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_bootfile_err"))) + return + } + + //Determine whether the task name of the task in the project is duplicated + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeInference), displayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("the job name did already exist")) + return + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error")) + return + } + } + + spec, err := resource.GetAndCheckSpec(ctx.User.ID, option.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo}) + if err != nil || spec == nil { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Resource specification not available")) + return + } + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance"))) + return + } + + //todo: del the codeLocalPath + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branchName) + + if err := downloadCode(repo, codeLocalPath, branchName); err != nil { + log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + //todo: upload code (send to file_server todo this work?) + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to obsMkdir_result")) + return + } + + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to obsMkdir_log")) + return + } + + if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + var parameters models.Parameters + param := make([]models.Parameter, 0) + param = append(param, models.Parameter{ + Label: modelarts.ResultUrl, + Value: "s3:/" + resultObsPath, + }, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + + datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) + if err != nil { + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + dataPath := dataUrl + jsondatas, err := json.Marshal(datasUrlList) + if err != nil { + log.Error("Failed to Marshal: %v", err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("json error:"+err.Error())) + return + } + if isMultiDataset { + param = append(param, models.Parameter{ + Label: modelarts.MultiDataUrl, + Value: string(jsondatas), + }) + } + + existDeviceTarget := false + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("运行参数错误")) + return + } + + for _, parameter := range parameters.Parameter { + if parameter.Label == modelarts.DeviceTarget { + existDeviceTarget = true + } + if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { + param = append(param, models.Parameter{ + Label: parameter.Label, + Value: parameter.Value, + }) + } + } + } + if !existDeviceTarget { + param = append(param, models.Parameter{ + Label: modelarts.DeviceTarget, + Value: modelarts.Ascend, + }) + } + + req := &modelarts.GenerateInferenceJobReq{ + JobName: jobName, + DisplayJobName: displayJobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: trainUrl, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: getPoolId(), + Uuid: uuid, + Parameters: param, //modelarts train parameters + CommitID: commitID, + BranchName: branchName, + Params: option.Params, + EngineName: EngineName, + LabelName: LabelName, + IsLatestVersion: isLatestVersion, + VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, + ModelName: modelName, + ModelVersion: modelVersion, + CkptName: ckptName, + ResultUrl: resultObsPath, + Spec: spec, + DatasetName: datasetNames, + JobType: string(models.JobTypeInference), + } + + jobId, err := modelarts.GenerateInferenceJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error()) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId}) +} + +func getDatasUrlListByUUIDS(uuidStr string) ([]models.Datasurl, string, string, bool, error) { + var isMultiDataset bool + var dataUrl string + var datasetNames string + var datasUrlList []models.Datasurl + uuids := strings.Split(uuidStr, ";") + if len(uuids) > setting.MaxDatasetNum { + log.Error("the dataset count(%d) exceed the limit", len(uuids)) + return datasUrlList, dataUrl, datasetNames, isMultiDataset, errors.New("the dataset count exceed the limit") + } + + datasetInfos := make(map[string]models.DatasetInfo) + attachs, err := models.GetAttachmentsByUUIDs(uuids) + if err != nil || len(attachs) != len(uuids) { + log.Error("GetAttachmentsByUUIDs failed: %v", err) + return datasUrlList, dataUrl, datasetNames, isMultiDataset, errors.New("GetAttachmentsByUUIDs failed") + } + + for i, tmpUuid := range uuids { + var attach *models.Attachment + for _, tmpAttach := range attachs { + if tmpAttach.UUID == tmpUuid { + attach = tmpAttach + break + } + } + if attach == nil { + log.Error("GetAttachmentsByUUIDs failed: %v", err) + return datasUrlList, dataUrl, datasetNames, isMultiDataset, errors.New("GetAttachmentsByUUIDs failed") + } + fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz") + for _, datasetInfo := range datasetInfos { + if fileName == datasetInfo.Name { + log.Error("the dataset name is same: %v", attach.Name) + return datasUrlList, dataUrl, datasetNames, isMultiDataset, errors.New("the dataset name is same") + } + } + if len(attachs) <= 1 { + dataUrl = "/" + setting.Bucket + "/" + setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + attach.UUID + "/" + isMultiDataset = false + } else { + dataUrl = "/" + setting.Bucket + "/" + setting.BasePath + path.Join(attachs[0].UUID[0:1], attachs[0].UUID[1:2]) + "/" + attachs[0].UUID + attachs[0].UUID + "/" + datasetUrl := "s3://" + setting.Bucket + "/" + setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + attach.UUID + "/" + datasUrlList = append(datasUrlList, models.Datasurl{ + DatasetUrl: datasetUrl, + DatasetName: fileName, + }) + isMultiDataset = true + } + + if i == 0 { + datasetNames = attach.Name + } else { + datasetNames += ";" + attach.Name + } + } + + return datasUrlList, dataUrl, datasetNames, isMultiDataset, nil +} +func checkInferenceJobMultiNode(userId int64, serverNum int) string { + if serverNum == 1 { + return "" + } + + return "repo.modelarts.no_node_right" + +} + +func paramCheckCreateInferenceJob(option api.CreateTrainJobOption) error { + if !strings.HasSuffix(strings.TrimSpace(option.BootFile), ".py") { + log.Error("the boot file(%s) must be a python file", strings.TrimSpace(option.BootFile)) + return errors.New("启动文件必须是python文件") + } + + if option.ModelName == "" { + log.Error("the ModelName(%d) must not be nil", option.ModelName) + return errors.New("模型名称不能为空") + } + if option.ModelVersion == "" { + log.Error("the ModelVersion(%d) must not be nil", option.ModelVersion) + return errors.New("模型版本不能为空") + } + if option.CkptName == "" { + log.Error("the CkptName(%d) must not be nil", option.CkptName) + return errors.New("权重文件不能为空") + } + if option.BranchName == "" { + log.Error("the Branch(%d) must not be nil", option.BranchName) + return errors.New("分支名不能为空") + } + + if utf8.RuneCountInString(option.Description) > 255 { + log.Error("the Description length(%d) must not more than 255", option.Description) + return errors.New("描述字符不能超过255个字符") + } + + return nil +} + +func loadCodeAndMakeModelPath(repo *models.Repository, codePath string, branchName string, jobName string, resultPath string) string { + err := downloadCode(repo, codePath, branchName) + if err != nil { + return "cloudbrain.load_code_failed" + } + + err = uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") + if err != nil { + return "cloudbrain.load_code_failed" + } + + modelPath := setting.JobPath + jobName + resultPath + "/" + err = mkModelPath(modelPath) + if err != nil { + return "cloudbrain.load_code_failed" + } + err = uploadCodeToMinio(modelPath, jobName, resultPath+"/") + if err != nil { + return "cloudbrain.load_code_failed" + } + + return "" +} + +func downloadCode(repo *models.Repository, codePath, branchName string) error { + //add "file:///" prefix to make the depth valid + if err := git.Clone(CLONE_FILE_PREFIX+repo.RepoPath(), codePath, git.CloneRepoOptions{Branch: branchName, Depth: 1}); err != nil { + log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err) + return err + } + + configFile, err := os.OpenFile(codePath+"/.git/config", os.O_RDWR, 0666) + if err != nil { + log.Error("open file(%s) failed:%v", codePath+"/,git/config", err) + return err + } + + defer configFile.Close() + + pos := int64(0) + reader := bufio.NewReader(configFile) + for { + line, err := reader.ReadString('\n') + if err != nil { + if err == io.EOF { + log.Error("not find the remote-url") + return nil + } else { + log.Error("read error: %v", err) + return err + } + } + + if strings.Contains(line, "url") && strings.Contains(line, ".git") { + originUrl := "\turl = " + repo.CloneLink().HTTPS + "\n" + if len(line) > len(originUrl) { + originUrl += strings.Repeat(" ", len(line)-len(originUrl)) + } + bytes := []byte(originUrl) + _, err := configFile.WriteAt(bytes, pos) + if err != nil { + log.Error("WriteAt failed:%v", err) + return err + } + break + } + + pos += int64(len(line)) + } + + return nil +} + +func getInferenceJobCommand(option api.CreateTrainJobOption) (string, error) { + var command string + bootFile := strings.TrimSpace(option.BootFile) + params := option.Params + + if !strings.HasSuffix(bootFile, ".py") { + log.Error("bootFile(%s) format error", bootFile) + return command, errors.New("bootFile format error") + } + + var parameters models.Parameters + var param string + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + return command, err + } + + for _, parameter := range parameters.Parameter { + param += " --" + parameter.Label + "=" + parameter.Value + } + } + + param += " --modelname" + "=" + option.CkptName + + command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + option.DisplayJobName + "-" + cloudbrain.LogFile + + return command, nil +} diff --git a/services/cloudbrain/cloudbrainTask/train.go b/services/cloudbrain/cloudbrainTask/train.go new file mode 100644 index 000000000..8e4673d66 --- /dev/null +++ b/services/cloudbrain/cloudbrainTask/train.go @@ -0,0 +1,1210 @@ +package cloudbrainTask + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "path" + "regexp" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/urfs_client/urchin" + + "code.gitea.io/gitea/modules/timeutil" + + "code.gitea.io/gitea/modules/notification" + + "code.gitea.io/gitea/modules/obs" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/storage" + "github.com/unknwon/com" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/grampus" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/modelarts" + "code.gitea.io/gitea/modules/redis/redis_key" + "code.gitea.io/gitea/modules/redis/redis_lock" + "code.gitea.io/gitea/modules/setting" + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/services/cloudbrain/resource" + "code.gitea.io/gitea/services/reward/point/account" +) + +var jobNamePattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-_]{1,34}[a-z0-9-]$`) + +const TaskTypeCloudbrainOne = 0 +const TaskTypeModelArts = 1 +const TaskTypeGrampusGPU = 2 +const TaskTypeGrampusNPU = 3 + +func CloudbrainOneTrainJobCreate(ctx *context.Context, option api.CreateTrainJobOption) { + + displayJobName := option.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + image := strings.TrimSpace(option.Image) + uuids := option.Attachment + jobType := string(models.JobTypeTrain) + + codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath + branchName := option.BranchName + repo := ctx.Repo.Repository + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName)) + defer lock.UnLock() + spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + command, err := getTrainJobCommand(option) + if err != nil { + log.Error("getTrainJobCommand failed: %v", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) + if errStr != "" { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr(errStr))) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + + req := cloudbrain.GenerateCloudBrainTaskReq{ + Ctx: ctx, + DisplayJobName: displayJobName, + JobName: jobName, + Image: image, + Command: command, + Uuids: uuids, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), + ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), + Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), + JobType: jobType, + Description: option.Description, + BranchName: branchName, + BootFile: option.BootFile, + Params: option.Params, + CommitID: commitID, + BenchmarkTypeID: 0, + BenchmarkChildTypeID: 0, + ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, + } + + if option.ModelName != "" { //使用预训练模型训练 + req.ModelName = option.ModelName + req.LabelName = option.LabelName + req.CkptName = option.CkptName + req.ModelVersion = option.ModelVersion + req.PreTrainModelPath = setting.Attachment.Minio.RealPath + option.PreTrainModelUrl + req.PreTrainModelUrl = option.PreTrainModelUrl + + } + + jobId, err := cloudbrain.GenerateTask(req) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 0, + Message: jobId, + }) +} +func ModelArtsTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOption) { + VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) + displayJobName := option.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + uuid := option.Attachment + description := option.Description + workServerNumber := option.WorkServerNumber + engineID, _ := strconv.Atoi(option.ImageID) + bootFile := strings.TrimSpace(option.BootFile) + params := option.Params + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" + branchName := option.BranchName + isLatestVersion := modelarts.IsLatestVersion + VersionCount := modelarts.VersionCountOne + EngineName := option.Image + + errStr := checkMultiNode(ctx.User.ID, option.WorkServerNumber) + if errStr != "" { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr(errStr))) + return + } + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) + defer lock.UnLock() + + spec, _, _, err := checkParameters(ctx, option, lock, repo) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + //todo: del the codeLocalPath + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branchName) + + if err := downloadCode(repo, codeLocalPath, branchName); err != nil { + log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + //todo: upload code (send to file_server todo this work?) + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to obsMkdir_output")) + return + } + + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to obsMkdir_log")) + return + } + + parentDir := VersionOutputPath + "/" + if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { + // if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + var parameters models.Parameters + param := make([]models.Parameter, 0) + existDeviceTarget := false + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("运行参数错误")) + return + } + + for _, parameter := range parameters.Parameter { + if parameter.Label == modelarts.DeviceTarget { + existDeviceTarget = true + } + if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { + param = append(param, models.Parameter{ + Label: parameter.Label, + Value: parameter.Value, + }) + } + } + } + if !existDeviceTarget { + param = append(param, models.Parameter{ + Label: modelarts.DeviceTarget, + Value: modelarts.Ascend, + }) + } + datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) + if err != nil { + log.Error("Failed to getDatasUrlListByUUIDS: %v", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to getDatasUrlListByUUIDS:"+err.Error())) + return + } + dataPath := dataUrl + jsondatas, err := json.Marshal(datasUrlList) + if err != nil { + log.Error("Failed to Marshal: %v", err) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("json error:"+err.Error())) + return + } + if isMultiDataset { + param = append(param, models.Parameter{ + Label: modelarts.MultiDataUrl, + Value: string(jsondatas), + }) + } + if option.ModelName != "" { //使用预训练模型训练 + ckptUrl := "/" + option.PreTrainModelUrl + option.CkptName + param = append(param, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + } + + req := &modelarts.GenerateTrainJobReq{ + JobName: jobName, + DisplayJobName: displayJobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: outputObsPath, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: getPoolId(), + Uuid: uuid, + Parameters: param, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branchName, + Params: option.Params, + EngineName: EngineName, + VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, + DatasetName: datasetNames, + Spec: spec, + } + if option.ModelName != "" { //使用预训练模型训练 + req.ModelName = option.ModelName + req.LabelName = option.LabelName + req.CkptName = option.CkptName + req.ModelVersion = option.ModelVersion + req.PreTrainModelUrl = option.PreTrainModelUrl + + } + + userCommand, userImageUrl := getUserCommand(engineID, req) + req.UserCommand = userCommand + req.UserImageUrl = userImageUrl + + //将params转换Parameters.Parameter,出错时返回给前端 + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(params), &Parameters); err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("json.Unmarshal failed:"+err.Error())) + return + } + + jobId, err := modelarts.GenerateTrainJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error()) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 0, + Message: jobId, + }) + +} + +func GrampusTrainJobGpuCreate(ctx *context.Context, option api.CreateTrainJobOption) { + + displayJobName := option.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + uuid := option.Attachment + description := option.Description + bootFile := strings.TrimSpace(option.BootFile) + params := option.Params + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" + codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" + branchName := option.BranchName + image := strings.TrimSpace(option.Image) + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) + defer lock.UnLock() + spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + //prepare code and out path + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + + if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { + log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + + } + + //todo: upload code (send to file_server todo this work?) + //upload code + if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { + log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/" + if err := mkModelPath(modelPath); err != nil { + log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + //init model readme + if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { + log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + var datasetRemotePath, allFileName string + for _, datasetInfo := range datasetInfos { + if datasetRemotePath == "" { + datasetRemotePath = datasetInfo.DataLocalPath + allFileName = datasetInfo.FullName + } else { + datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + allFileName = allFileName + ";" + datasetInfo.FullName + } + + } + + //prepare command + preTrainModelPath := getPreTrainModelPath(option.PreTrainModelUrl, option.CkptName) + + command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, option.CkptName, "") + if err != nil { + log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Create task failed, internal error")) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + + req := &grampus.GenerateTrainJobReq{ + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: models.GPUResource, + ProcessType: grampus.ProcessorTypeGPU, + Command: command, + ImageUrl: image, + Description: description, + BootFile: bootFile, + Uuid: uuid, + CommitID: commitID, + BranchName: branchName, + Params: option.Params, + EngineName: image, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + + IsLatestVersion: modelarts.IsLatestVersion, + VersionCount: modelarts.VersionCountOne, + WorkServerNumber: 1, + Spec: spec, + } + + if option.ModelName != "" { //使用预训练模型训练 + req.ModelName = option.ModelName + req.LabelName = option.LabelName + req.CkptName = option.CkptName + req.ModelVersion = option.ModelVersion + req.PreTrainModelUrl = option.PreTrainModelUrl + + } + + jobId, err := grampus.GenerateTrainJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId}) +} + +func checkParameters(ctx *context.Context, option api.CreateTrainJobOption, lock *redis_lock.DistributeLock, repo *models.Repository) (*models.Specification, map[string]models.DatasetInfo, string, error) { + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + + return nil, nil, "", fmt.Errorf(ctx.Tr("repo.cloudbrain_samejob_err")) + } + + if !jobNamePattern.MatchString(option.DisplayJobName) { + return nil, nil, "", fmt.Errorf(ctx.Tr("repo.cloudbrain_jobname_err")) + } + + bootFileExist, err := ctx.Repo.FileExists(option.BootFile, option.BranchName) + if err != nil || !bootFileExist { + log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) + return nil, nil, "", fmt.Errorf(ctx.Tr("repo.cloudbrain_bootfile_err")) + } + + computeResource := models.GPUResource + if isNpuTask(option) { + computeResource = models.NPUResource + } + + //check count limit + taskType := option.Type + if isC2NetTask(option) { + taskType = 2 + } + + count, err := GetNotFinalStatusTaskCount(ctx.User.ID, taskType, string(models.JobTypeTrain), computeResource) + if err != nil { + log.Error("GetCountByUserID failed:%v", err, ctx.Data["MsgID"]) + return nil, nil, "", fmt.Errorf("system error") + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + return nil, nil, "", fmt.Errorf("you have already a running or waiting task, can not create more.") + } + } + + //check param + if err := paramCheckCreateTrainJob(option.BootFile, option.BranchName); err != nil { + log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) + return nil, nil, "", err + } + + //check whether the task name in the project is duplicated + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeTrain), option.DisplayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + return nil, nil, "", fmt.Errorf("The job name did already exist.") + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + return nil, nil, "", fmt.Errorf("system error") + } + } + + //check specification + computeType := models.GPU + + if isNpuTask(option) { + computeType = models.NPU + } + cluster := models.OpenICluster + if isC2NetTask(option) { + cluster = models.C2NetCluster + } + aiCenterCode := "" + if option.Type == TaskTypeCloudbrainOne { + aiCenterCode = models.AICenterOfCloudBrainOne + } else if option.Type == TaskTypeModelArts { + aiCenterCode = models.AICenterOfCloudBrainTwo + } + + spec, err := resource.GetAndCheckSpec(ctx.User.ID, option.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeTrain, + ComputeResource: computeType, + Cluster: cluster, + AiCenterCode: aiCenterCode, + }) + if err != nil || spec == nil { + return nil, nil, "", fmt.Errorf("Resource specification is not available.") + } + + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) + return nil, nil, "", fmt.Errorf(ctx.Tr("points.insufficient_points_balance")) + } + + //check dataset + var datasetInfos map[string]models.DatasetInfo + var datasetNames string + if option.Type != TaskTypeModelArts { + if isC2NetTask(option) { + datasetInfos, datasetNames, err = models.GetDatasetInfo(option.Attachment, computeType) + } else { + datasetInfos, datasetNames, err = models.GetDatasetInfo(option.Attachment) + } + + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil, nil, "", fmt.Errorf(ctx.Tr("cloudbrain.error.dataset_select")) + } + } + return spec, datasetInfos, datasetNames, err +} + +func isNpuTask(option api.CreateTrainJobOption) bool { + return option.Type == TaskTypeModelArts || option.Type == TaskTypeGrampusNPU +} + +func isC2NetTask(option api.CreateTrainJobOption) bool { + return option.Type == TaskTypeGrampusGPU || option.Type == TaskTypeGrampusNPU +} + +func GrampusTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOption) { + + displayJobName := option.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + uuid := option.Attachment + description := option.Description + bootFile := strings.TrimSpace(option.BootFile) + params := option.Params + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + codeObsPath := grampus.JobPath + jobName + modelarts.CodePath + branchName := option.BranchName + isLatestVersion := modelarts.IsLatestVersion + versionCount := modelarts.VersionCountOne + engineName := option.Image + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) + defer lock.UnLock() + spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + + //prepare code and out path + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + + if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { + log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + //todo: upload code (send to file_server todo this work?) + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { + log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + var datasetRemotePath, allFileName string + for _, datasetInfo := range datasetInfos { + if datasetRemotePath == "" { + datasetRemotePath = datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" + allFileName = datasetInfo.FullName + } else { + datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" + allFileName = allFileName + ";" + datasetInfo.FullName + } + + } + + //prepare command + preTrainModelPath := getPreTrainModelPath(option.PreTrainModelUrl, option.CkptName) + command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, option.CkptName, grampus.GetNpuModelRemoteObsUrl(jobName)) + if err != nil { + log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Create task failed, internal error")) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + + req := &grampus.GenerateTrainJobReq{ + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: models.NPUResource, + ProcessType: grampus.ProcessorTypeNPU, + Command: command, + ImageId: option.ImageID, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + WorkServerNumber: option.WorkServerNumber, + Uuid: uuid, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branchName, + Params: option.Params, + EngineName: engineName, + VersionCount: versionCount, + TotalVersionCount: modelarts.TotalVersionCount, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + Spec: spec, + CodeName: strings.ToLower(repo.Name), + } + if option.ModelName != "" { //使用预训练模型训练 + req.ModelName = option.ModelName + req.LabelName = option.LabelName + req.CkptName = option.CkptName + req.ModelVersion = option.ModelVersion + req.PreTrainModelUrl = option.PreTrainModelUrl + req.PreTrainModelPath = preTrainModelPath + } + + jobId, err := grampus.GenerateTrainJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error()) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId}) +} + +func obsMkdir(dir string) error { + input := &obs.PutObjectInput{} + input.Bucket = setting.Bucket + input.Key = dir + _, err := storage.ObsCli.PutObject(input) + if err != nil { + log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) + return err + } + + return nil +} +func uploadCodeToObs(codePath, jobName, parentDir string) error { + files, err := readDir(codePath) + if err != nil { + log.Error("readDir(%s) failed: %s", codePath, err.Error()) + return err + } + + for _, file := range files { + if file.IsDir() { + input := &obs.PutObjectInput{} + input.Bucket = setting.Bucket + input.Key = parentDir + file.Name() + "/" + _, err = storage.ObsCli.PutObject(input) + if err != nil { + log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) + return err + } + + if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil { + log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error()) + return err + } + } else { + input := &obs.PutFileInput{} + input.Bucket = setting.Bucket + input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name() + input.SourceFile = codePath + file.Name() + _, err = storage.ObsCli.PutFile(input) + if err != nil { + log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error()) + return err + } + } + } + + return nil +} + +func paramCheckCreateTrainJob(bootFile string, branchName string) error { + if !strings.HasSuffix(strings.TrimSpace(bootFile), ".py") { + log.Error("the boot file(%s) must be a python file", bootFile) + return errors.New("启动文件必须是python文件") + } + + if branchName == "" { + log.Error("the branch must not be null!", branchName) + return errors.New("代码分支不能为空!") + } + + return nil +} +func downloadZipCode(ctx *context.Context, codePath, branchName string) error { + archiveType := git.ZIP + archivePath := codePath + + if !com.IsDir(archivePath) { + if err := os.MkdirAll(archivePath, os.ModePerm); err != nil { + log.Error("MkdirAll failed:" + err.Error()) + return err + } + } + + // Get corresponding commit. + var ( + commit *git.Commit + err error + ) + + gitRepo := ctx.Repo.GitRepo + if err != nil { + log.Error("OpenRepository failed:" + err.Error()) + return err + } + + if gitRepo.IsBranchExist(branchName) { + commit, err = gitRepo.GetBranchCommit(branchName) + if err != nil { + log.Error("GetBranchCommit failed:" + err.Error()) + return err + } + } else { + log.Error("the branch is not exist: " + branchName) + return fmt.Errorf("The branch does not exist.") + } + + archivePath = path.Join(archivePath, grampus.CodeArchiveName) + if !com.IsFile(archivePath) { + if err := commit.CreateArchive(archivePath, git.CreateArchiveOpts{ + Format: archiveType, + Prefix: setting.Repository.PrefixArchiveFiles, + }); err != nil { + log.Error("CreateArchive failed:" + err.Error()) + return err + } + } + + return nil +} + +func uploadCodeToMinio(codePath, jobName, parentDir string) error { + files, err := readDir(codePath) + if err != nil { + log.Error("readDir(%s) failed: %s", codePath, err.Error()) + return err + } + + for _, file := range files { + if file.IsDir() { + if err = uploadCodeToMinio(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil { + log.Error("uploadCodeToMinio(%s) failed: %s", file.Name(), err.Error()) + return err + } + } else { + destObject := setting.CBCodePathPrefix + jobName + parentDir + file.Name() + sourceFile := codePath + file.Name() + err = storage.Attachments.UploadObject(destObject, sourceFile) + if err != nil { + log.Error("UploadObject(%s) failed: %s", file.Name(), err.Error()) + return err + } + } + } + + return nil +} + +func readDir(dirname string) ([]os.FileInfo, error) { + f, err := os.Open(dirname) + if err != nil { + return nil, err + } + + list, err := f.Readdir(0) + f.Close() + if err != nil { + //todo: can not upload empty folder + if err == io.EOF { + return nil, nil + } + return nil, err + } + + //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() }) + return list, nil +} +func mkModelPath(modelPath string) error { + return mkPathAndReadMeFile(modelPath, "You can put the files into this directory and download the files by the web page.") +} + +func mkPathAndReadMeFile(path string, text string) error { + err := os.MkdirAll(path, os.ModePerm) + if err != nil { + log.Error("MkdirAll(%s) failed:%v", path, err) + return err + } + + fileName := path + "README" + f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) + if err != nil { + log.Error("OpenFile failed", err.Error()) + return err + } + + defer f.Close() + + _, err = f.WriteString(text) + if err != nil { + log.Error("WriteString failed", err.Error()) + return err + } + + return nil +} + +func getPreTrainModelPath(pretrainModelDir string, fileName string) string { + index := strings.Index(pretrainModelDir, "/") + if index > 0 { + filterBucket := pretrainModelDir[index+1:] + return filterBucket + fileName + } else { + return "" + } + +} + +func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName, modelRemoteObsUrl string) (string, error) { + var command string + + //prepare + workDir := grampus.NpuWorkDir + if processorType == grampus.ProcessorTypeNPU { + command += "pwd;cd " + workDir + grampus.CommandPrepareScriptNpu + } else if processorType == grampus.ProcessorTypeGPU { + workDir = grampus.GpuWorkDir + command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScriptGpu, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject) + } + + //download code & dataset + if processorType == grampus.ProcessorTypeNPU { + //no need to download code & dataset by internet + } else if processorType == grampus.ProcessorTypeGPU { + commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " '" + dataRemotePath + "' '" + datasetName + "'" + commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) + command += commandDownload + } + + //unzip code & dataset + if processorType == grampus.ProcessorTypeNPU { + //no need to process + } else if processorType == grampus.ProcessorTypeGPU { + unZipDatasetCommand := GenerateDatasetUnzipCommand(datasetName) + commandUnzip := "cd " + workDir + "code;unzip -q master.zip;rm -f master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand + command += commandUnzip + } + + command += "echo \"unzip finished;start to exec code;\";" + + // set export + var commandExport string + if processorType == grampus.ProcessorTypeNPU { + commandExport = "export bucket=" + setting.Bucket + " && export remote_path=" + outputRemotePath + ";" + } else if processorType == grampus.ProcessorTypeGPU { + commandExport = "export env=" + setting.Grampus.Env + " && export remote_path=" + outputRemotePath + ";" + } + + command += commandExport + + //exec code + var parameters models.Parameters + var paramCode string + + if len(paramSrc) != 0 { + err := json.Unmarshal([]byte(paramSrc), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", paramSrc, err) + return command, err + } + + for _, parameter := range parameters.Parameter { + paramCode += " --" + parameter.Label + "=" + parameter.Value + } + } + + var commandCode string + if processorType == grampus.ProcessorTypeNPU { + paramCode += " --model_url=" + modelRemoteObsUrl + commandCode = "/bin/bash /home/work/run_train_for_openi.sh /home/work/openi.py " + grampus.NpuLocalLogUrl + paramCode + ";" + } else if processorType == grampus.ProcessorTypeGPU { + if pretrainModelFileName != "" { + paramCode += " --ckpt_url" + "=" + workDir + "pretrainmodel/" + pretrainModelFileName + } + commandCode = "cd " + workDir + "code/" + strings.ToLower(repoName) + ";python " + bootFile + paramCode + ";" + } + + command += commandCode + + //get exec result + commandGetRes := "result=$?;" + command += commandGetRes + + //upload models + if processorType == grampus.ProcessorTypeNPU { + // no need to upload + } else if processorType == grampus.ProcessorTypeGPU { + commandUpload := "cd " + workDir + setting.Grampus.SyncScriptProject + "/;./uploader_for_gpu " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;" + command += commandUpload + } + + //check exec result + commandCheckRes := "bash -c \"[[ $result -eq 0 ]] && exit 0 || exit -1\"" + command += commandCheckRes + + return command, nil +} +func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileName string, commandDownload string) string { + commandDownloadTemp := commandDownload + if pretrainModelPath != "" { + commandDownloadTemp += " '" + pretrainModelPath + "' '" + pretrainModelFileName + "'" + } + commandDownloadTemp += ";" + return commandDownloadTemp +} + +func GenerateDatasetUnzipCommand(datasetName string) string { + var unZipDatasetCommand string + + datasetNameArray := strings.Split(datasetName, ";") + if len(datasetNameArray) == 1 { //单数据集 + unZipDatasetCommand = "unzip -q '" + datasetName + "';" + if strings.HasSuffix(datasetNameArray[0], ".tar.gz") { + unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';" + } + unZipDatasetCommand += "rm -f '" + datasetName + "';" + + } else { //多数据集 + for _, datasetNameTemp := range datasetNameArray { + if strings.HasSuffix(datasetNameTemp, ".tar.gz") { + unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetNameTemp + "';" + } else { + unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';" + } + unZipDatasetCommand += "rm -f '" + datasetNameTemp + "';" + } + + } + return unZipDatasetCommand +} + +func getPoolId() string { + var resourcePools modelarts.ResourcePool + json.Unmarshal([]byte(setting.ResourcePools), &resourcePools) + + return resourcePools.Info[0].ID +} + +func PrepareSpec4Show(task *models.Cloudbrain) { + s, err := resource.GetCloudbrainSpec(task.ID) + if err != nil { + log.Info("error:" + err.Error()) + return + } + task.Spec = s +} + +func IsTaskNotStop(task *models.Cloudbrain) bool { + statuses := CloudbrainOneNotFinalStatuses + if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { + statuses = CloudbrainTwoNotFinalStatuses + } else { + statuses = GrampusNotFinalStatuses + } + + for _, status := range statuses { + if task.Status == status { + return true + } + } + return false + +} + +func SyncTaskStatus(task *models.Cloudbrain) error { + if task.Type == models.TypeCloudBrainOne { + result, err := cloudbrain.GetJob(task.JobID) + if err != nil { + log.Info("error:" + err.Error()) + return fmt.Errorf("repo.cloudbrain_query_fail") + } + + if result != nil { + jobRes, _ := models.ConvertToJobResultPayload(result.Payload) + taskRoles := jobRes.TaskRoles + taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + + oldStatus := task.Status + task.Status = taskRes.TaskStatuses[0].State + + task.ContainerID = taskRes.TaskStatuses[0].ContainerID + models.ParseAndSetDurationFromCloudBrainOne(jobRes, task) + + if task.DeletedAt.IsZero() { //normal record + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } + err = models.UpdateJob(task) + if err != nil { + return fmt.Errorf("repo.cloudbrain_query_fail") + + } + } + + } else { + log.Info("error:" + err.Error()) + return fmt.Errorf("repo.cloudbrain_query_fail") + } + } else if task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { + err := modelarts.HandleTrainJobInfo(task) + if err != nil { + return fmt.Errorf("repo.cloudbrain_query_fail") + } + + } else if task.Type == models.TypeC2Net { + result, err := grampus.GetJob(task.JobID) + if err != nil { + log.Error("GetJob failed:" + err.Error()) + return fmt.Errorf("repo.cloudbrain_query_fail") + } + + if result != nil { + if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { + task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] + } + oldStatus := task.Status + task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) + + if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { + task.Duration = result.JobInfo.RunSec + if task.Duration < 0 { + task.Duration = 0 + } + task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) + + if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { + task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) + } + if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { + task.EndTime = task.StartTime.Add(task.Duration) + } + task.CorrectCreateUnix() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + if models.IsTrainJobTerminal(task.Status) && task.ComputeResource == models.NPUResource { + if len(result.JobInfo.Tasks[0].CenterID) == 1 { + urchin.GetBackNpuModel(task.ID, grampus.GetRemoteEndPoint(result.JobInfo.Tasks[0].CenterID[0]), grampus.BucketRemote, grampus.GetNpuModelObjectKey(task.JobName), grampus.GetCenterProxy(setting.Grampus.LocalCenterID)) + } + } + } + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob failed:" + err.Error()) + return fmt.Errorf("repo.cloudbrain_query_fail") + } + } + } + } + return nil + +} + +func getTrainJobCommand(option api.CreateTrainJobOption) (string, error) { + var command string + bootFile := strings.TrimSpace(option.BootFile) + params := option.Params + + if !strings.HasSuffix(bootFile, ".py") { + log.Error("bootFile(%s) format error", bootFile) + return command, errors.New("bootFile format error") + } + + var parameters models.Parameters + var param string + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + return command, err + } + + for _, parameter := range parameters.Parameter { + param += " --" + parameter.Label + "=" + parameter.Value + } + } + if option.CkptName != "" { + param += " --ckpt_url" + "=" + "/pretrainmodel/" + option.CkptName + } + + command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + option.DisplayJobName + "-" + cloudbrain.LogFile + + return command, nil +} + +func checkMultiNode(userId int64, serverNum int) string { + if serverNum == 1 { + return "" + } + modelarts.InitMultiNode() + var isServerNumValid = false + if modelarts.MultiNodeConfig != nil { + for _, info := range modelarts.MultiNodeConfig.Info { + if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg { + if isInNodes(info.Node, serverNum) { + isServerNumValid = true + break + } + + } + } + } + if isServerNumValid { + return "" + } else { + return "repo.modelarts.no_node_right" + } +} + +func isInNodes(nodes []int, num int) bool { + for _, node := range nodes { + if node == num { + return true + } + } + return false + +} + +func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, string) { + userImageUrl := "" + userCommand := "" + if engineId < 0 { + tmpCodeObsPath := strings.Trim(req.CodeObsPath, "/") + tmpCodeObsPaths := strings.Split(tmpCodeObsPath, "/") + lastCodeDir := "code" + if len(tmpCodeObsPaths) > 0 { + lastCodeDir = tmpCodeObsPaths[len(tmpCodeObsPaths)-1] + } + userCommand = "/bin/bash /home/work/run_train.sh 's3://" + req.CodeObsPath + "' '" + lastCodeDir + "/" + req.BootFile + "' '/tmp/log/train.log' --'data_url'='s3://" + req.DataUrl + "' --'train_url'='s3://" + req.TrainUrl + "'" + var versionInfos modelarts.VersionInfo + if err := json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + log.Info("json parse err." + err.Error()) + } else { + for _, engine := range versionInfos.Version { + if engine.ID == engineId { + userImageUrl = engine.Url + break + } + } + } + for _, param := range req.Parameters { + userCommand += " --'" + param.Label + "'='" + param.Value + "'" + } + return userCommand, userImageUrl + } + return userCommand, userImageUrl +} diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index b68abbb88..93abb2923 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -1,20 +1,23 @@ package resource import ( + "encoding/json" + "errors" + "fmt" + "strconv" + "strings" + "time" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/convert" "code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/setting" + api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/services/admin/operate_log" - "encoding/json" - "errors" - "fmt" - "strconv" - "strings" - "time" ) func AddResourceSpecification(doerId int64, req models.ResourceSpecificationReq) error { @@ -210,6 +213,18 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.S return specs, err } +func FindAvailableSpecs4Show(userId int64, opts models.FindSpecsOptions) ([]*api.SpecificationShow, error) { + specs, err := FindAvailableSpecs(userId, opts) + if err != nil { + return nil, err + } + result := make([]*api.SpecificationShow, len(specs)) + for i, v := range specs { + result[i] = convert.ToSpecification(v) + } + return result, nil +} + func filterExclusiveSpecs(r []*models.Specification, userId int64) []*models.Specification { specs := make([]*models.Specification, 0, len(r)) specMap := make(map[int64]string, 0) diff --git a/templates/repo/cloudbrain/inference/new.tmpl b/templates/repo/cloudbrain/inference/new.tmpl index df737f995..f2b389393 100644 --- a/templates/repo/cloudbrain/inference/new.tmpl +++ b/templates/repo/cloudbrain/inference/new.tmpl @@ -331,9 +331,7 @@ $('#model_name_version').empty() let html = '' nameMap[value].forEach(element => { - let {TrainTaskInfo} = element - TrainTaskInfo = JSON.parse(TrainTaskInfo) - html += `
${element.Version}
` + html += `
${element.version}
` }); $('#model_name_version').append(html) $("#select_model_version").removeClass("loading") @@ -387,7 +385,7 @@ } function loadCheckpointList(value){ return new Promise((resolve,reject)=>{ - $.get(`${RepoLink}/modelmanage/query_modelfile_for_predict`,{ID:value}, (data) => { + $.get(`${RepoLink}/modelmanage/query_modelfile_for_predict`,{id:value}, (data) => { resolve(data) }) }) diff --git a/templates/repo/cloudbrain/trainjob/show.tmpl b/templates/repo/cloudbrain/trainjob/show.tmpl index 55bba11a4..7bc3f2c82 100644 --- a/templates/repo/cloudbrain/trainjob/show.tmpl +++ b/templates/repo/cloudbrain/trainjob/show.tmpl @@ -633,24 +633,24 @@
- - + +
-
- +
@@ -364,9 +364,9 @@ $("#task_name").removeClass("error") } - data['desc']= $('#Description').val() - data['modelId'] = $('#ModelVersion').val() - data['SrcEngine'] = $('#SrcEngine').val(); + data['desc']= $('#description').val() + data['modelId'] = $('#modelVersion').val() + data['srcEngine'] = $('#srcEngine').val(); data['inputshape']= $('#inputshape').val(); if(inputshapeNotValid(data['inputshape'])){ @@ -379,10 +379,10 @@ } data['inputdataformat']= $('#inputdataformat').val(); - data['DestFormat'] = $('#DestFormat').val(); - data['NetOutputFormat']= $('#NetOutputFormat').val(); - data['ModelFile'] = $('#ModelFile').val(); - if(data['ModelFile']==""){ + data['destFormat'] = $('#destFormat').val(); + data['netOutputFormat']= $('#netOutputFormat').val(); + data['modelFile'] = $('#modelFile').val(); + if(data['modelFile']==""){ $('.ui.error.message').text("{{.i18n.Tr "repo.modelconvert.modelfileempty"}}") $('.ui.error.message').css('display','block') $("#ModelFile_Div").addClass("error") @@ -392,11 +392,11 @@ } $.post(`${repolink}/modelmanage/create_model_convert`,data,(result) => { console.log("result=" + result); - if(result.result_code ==0){ + if(result.code ==0){ $('.ui.modal.second').modal('hide'); window.location.reload(); }else{ - $('.ui.error.message').text(result.message) + $('.ui.error.message').text(result.msg) $('.ui.error.message').css('display','block') } }) @@ -456,7 +456,7 @@ $('#choice_version').dropdown({ onChange:function(value){ console.log("model version:" + value); - $('#choice_version input[name="ModelVersion"]').val(value) + $('#choice_version input[name="modelVersion"]').val(value) loadModelFile(value); } }) @@ -464,26 +464,26 @@ $('#choice_file').dropdown({ onChange:function(value){ console.log("model file:" + value); - $('#choice_file input[name="ModelFile"]').val(value) + $('#choice_file input[name="modelFile"]').val(value) } }) }) function srcEngineChanged(){ - var ele = window.document.getElementById("SrcEngine"); + var ele = window.document.getElementById("srcEngine"); var index=ele.selectedIndex; var options=ele.options; var option = options[index]; - console.log("SrcEngine value=" + option); + console.log("srcEngine value=" + option); let destFormatHtml = ""; let netOutputFormatHtml = ""; if(option==null || option =="undefined" || option.value == 0){ destFormatHtml += "" netOutputFormatHtml += ""; } - $('#DestFormat').html(destFormatHtml); - $('#NetOutputFormat').html(netOutputFormatHtml); + $('#destFormat').html(destFormatHtml); + $('#netOutputFormat').html(netOutputFormatHtml); } function loadModelList(){ @@ -509,7 +509,7 @@ if(modelId ==null || modelId ==""){ console.log("modelId is null"); }else{ - $.get(`${repolink}/modelmanage/query_modelfile_for_predict?ID=${modelId}`, (data) => { + $.get(`${repolink}/modelmanage/query_modelfile_for_predict?id=${modelId}`, (data) => { const n_length = data.length let file_html='' let firstFileName ='' @@ -526,7 +526,7 @@ } $("#model-file").append(file_html) $('#choice_file .default.text').text(firstFileName) - $('#choice_file input[name="ModelFile"]').val(firstFileName) + $('#choice_file input[name="modelFile"]').val(firstFileName) }) } @@ -550,19 +550,19 @@ n_length = versionList.length let train_html='' for (let i=0;i${versionList[i].Version}
` + train_html += `
${versionList[i].version}
` train_html += '
' } $("#model-version").append(train_html) - $('#choice_version .default.text').text(versionList[0].Version) - $('#choice_version input[name="ModelVersion"]').val(versionList[0].ID) - loadModelFile(versionList[0].ID); + $('#choice_version .default.text').text(versionList[0].version) + $('#choice_version input[name="modelVersion"]').val(versionList[0].id) + loadModelFile(versionList[0].id); } setEngineValue(value); } function setEngineValue(value){ - $('#SrcEngine').dropdown('clear'); + $('#srcEngine').dropdown('clear'); console.log("setEngineValue value=" + value); let html = "" html +=""; @@ -570,7 +570,8 @@ html +=""; html +=""; html +=""; - $('#SrcEngine').html(html); + + $('#srcEngine').html(html); srcEngineChanged(); } function getSelected(engineOption, modelName){ @@ -580,13 +581,13 @@ let nameMap = modelData.nameMap let versionList = nameMap[modelName] if(versionList != null && versionList.length >0){ - if(versionList[0].Engine == engineOption){ + if(versionList[0].engine == engineOption){ return "selected=\"selected\""; }else{ - if((versionList[0].Engine==122 || versionList[0].Engine==37) && engineOption==2){ + if((versionList[0].engine==122 || versionList[0].engine==37) && engineOption==2){ return "selected=\"selected\""; } - if((versionList[0].Engine==121 || versionList[0].Engine==38) && engineOption==1){ + if((versionList[0].engine==121 || versionList[0].engine==38) && engineOption==1){ return "selected=\"selected\""; } } diff --git a/templates/repo/modelmanage/index.tmpl b/templates/repo/modelmanage/index.tmpl index 062e1908f..6a42d96f7 100644 --- a/templates/repo/modelmanage/index.tmpl +++ b/templates/repo/modelmanage/index.tmpl @@ -138,20 +138,20 @@
- +
- +