diff --git a/models/action.go b/models/action.go index e2958821c..003dc1b20 100755 --- a/models/action.go +++ b/models/action.go @@ -346,11 +346,12 @@ func GetFeeds(opts GetFeedsOptions) ([]*Action, error) { return actions, nil } -func GetLast20PublicFeeds() ([]*Action, error) { +func GetLast20PublicFeeds(opTypes []int) ([]*Action, error) { cond := builder.NewCond() cond = cond.And(builder.Eq{"is_private": false}) cond = cond.And(builder.Eq{"is_deleted": false}) - + cond = cond.And(builder.Expr("user_id=act_user_id")) + cond = cond.And(builder.In("op_type", opTypes)) actions := make([]*Action, 0, 20) diff --git a/models/ai_model_manage.go b/models/ai_model_manage.go index 581b19a9c..ed696fcf0 100644 --- a/models/ai_model_manage.go +++ b/models/ai_model_manage.go @@ -36,6 +36,7 @@ type AiModelManage struct { CreatedUnix timeutil.TimeStamp `xorm:"created"` UpdatedUnix timeutil.TimeStamp `xorm:"updated"` IsCanOper bool + IsCanDelete bool } type AiModelQueryOptions struct { diff --git a/models/cloudbrain.go b/models/cloudbrain.go index efaa9ffeb..7270a9144 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -33,6 +33,7 @@ const ( JobTypeSnn4imagenet JobType = "SNN4IMAGENET" JobTypeBrainScore JobType = "BRAINSCORE" JobTypeTrain JobType = "TRAIN" + JobTypeInference JobType = "INFERENCE" //notebook ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 @@ -111,7 +112,7 @@ type Cloudbrain struct { ComputeResource string //计算资源,例如npu EngineID int64 //引擎id - TrainUrl string //输出的obs路径 + TrainUrl string //输出模型的obs路径 BranchName string //分支名称 Parameters string //传给modelarts的param参数 BootFile string //启动文件 @@ -125,6 +126,12 @@ type Cloudbrain struct { EngineName string //引擎名称 TotalVersionCount int //任务的所有版本数量,包括删除的 + LabelName string //标签名称 + ModelName string //模型名称 + ModelVersion string //模型版本 + CkptName string //权重文件名称 + ResultUrl string //推理结果的obs路径 + User *User `xorm:"-"` Repo *Repository `xorm:"-"` } @@ -207,7 +214,7 @@ type CloudbrainsOptions struct { CloudbrainIDs []int64 // JobStatus CloudbrainStatus Type int - JobType string + JobTypes []string VersionName string IsLatestVersion string JobTypeNot bool @@ -644,6 +651,25 @@ type Config struct { Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` } +type CreateInferenceJobParams struct { + JobName string `json:"job_name"` + Description string `json:"job_desc"` + InfConfig InfConfig `json:"config"` + WorkspaceID string `json:"workspace_id"` +} + +type InfConfig struct { + WorkServerNum int `json:"worker_server_num"` + AppUrl string `json:"app_url"` //训练作业的代码目录 + BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 + Parameter []Parameter `json:"parameter"` + DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL + EngineID int64 `json:"engine_id"` + LogUrl string `json:"log_url"` + CreateVersion bool `json:"create_version"` + Flavor Flavor `json:"flavor"` + PoolID string `json:"pool_id"` +} type CreateTrainJobVersionParams struct { Description string `json:"job_desc"` @@ -894,14 +920,14 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { ) } - if (opts.JobType) != "" { + if len(opts.JobTypes) > 0 { if opts.JobTypeNot { cond = cond.And( - builder.Neq{"cloudbrain.job_type": opts.JobType}, + builder.NotIn("cloudbrain.job_type", opts.JobTypes), ) } else { cond = cond.And( - builder.Eq{"cloudbrain.job_type": opts.JobType}, + builder.In("cloudbrain.job_type", opts.JobTypes), ) } } @@ -978,6 +1004,7 @@ func QueryModelTrainJobList(repoId int64) ([]*CloudbrainInfo, int, error) { cond = cond.And( builder.Eq{"job_type": "TRAIN"}, ) + cloudbrains := make([]*CloudbrainInfo, 0) if err := sess.Select("job_id,job_name").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC"). Find(&cloudbrains); err != nil { @@ -1025,9 +1052,9 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e ) } - if (opts.JobType) != "" { + if len(opts.JobTypes) > 0 { cond = cond.And( - builder.Eq{"cloudbrain.job_type": opts.JobType}, + builder.In("cloudbrain.job_type", opts.JobTypes), ) } @@ -1211,6 +1238,22 @@ func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) { return int(count), err } +func GetCloudbrainInferenceJobCountByUserID(userID int64) (int, error) { + count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted). + And("job_type = ? and user_id = ? and type = ?", JobTypeInference, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) + return int(count), err +} + +func UpdateInferenceJob(job *Cloudbrain) error { + return updateInferenceJob(x, job) +} + +func updateInferenceJob(e Engine, job *Cloudbrain) error { + var sess *xorm.Session + sess = e.Where("job_id = ?", job.JobID) + _, err := sess.Cols("status", "train_job_duration").Update(job) + return err +} func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { sess := x.NewSession() defer sess.Close() diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 59f72696e..821cd72f8 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -45,6 +45,30 @@ type CreateModelArtsTrainJobForm struct { EngineName string `form:"engine_names" binding:"Required"` } +type CreateModelArtsInferenceJobForm struct { + JobName string `form:"job_name" binding:"Required"` + Attachment string `form:"attachment" binding:"Required"` + BootFile string `form:"boot_file" binding:"Required"` + WorkServerNumber int `form:"work_server_number" binding:"Required"` + EngineID int `form:"engine_id" binding:"Required"` + PoolID string `form:"pool_id" binding:"Required"` + Flavor string `form:"flavor" binding:"Required"` + Params string `form:"run_para_list" binding:"Required"` + Description string `form:"description"` + IsSaveParam string `form:"is_save_para"` + ParameterTemplateName string `form:"parameter_template_name"` + PrameterDescription string `form:"parameter_description"` + BranchName string `form:"branch_name" binding:"Required"` + VersionName string `form:"version_name" binding:"Required"` + FlavorName string `form:"flaver_names" binding:"Required"` + EngineName string `form:"engine_names" binding:"Required"` + LabelName string `form:"label_names" binding:"Required"` + TrainUrl string `form:"train_url" binding:"Required"` + ModelName string `form:"model_name" binding:"Required"` + ModelVersion string `form:"model_version" binding:"Required"` + CkptName string `form:"ckpt_name" binding:"Required"` +} + func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { return validate(errs, ctx.Data, f, ctx.Locale) } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 3f7ebfd91..8af2a93e5 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -38,6 +38,7 @@ const ( // "]}" CodePath = "/code/" OutputPath = "/output/" + ResultPath = "/result/" LogPath = "/log/" JobPath = "/job/" OrderDesc = "desc" //向下查询 @@ -45,6 +46,8 @@ const ( Lines = 500 TrainUrl = "train_url" DataUrl = "data_url" + ResultUrl = "result_url" + CkptUrl = "ckpt_url" PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" @@ -113,6 +116,36 @@ type GenerateTrainJobVersionReq struct { TotalVersionCount int } +type GenerateInferenceJobReq struct { + JobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + FlavorCode string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + Params string + BranchName string + FlavorName string + EngineName string + LabelName string + IsLatestVersion string + VersionCount int + TotalVersionCount int + ModelName string + ModelVersion string + CkptName string + ResultUrl string +} + type VersionInfo struct { Version []struct { ID int `json:"id"` @@ -329,12 +362,14 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job return err } + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeTrain)) repo := ctx.Repo.Repository VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ - RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), - JobID: strconv.FormatInt(jobResult.JobID, 10), + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobTypes: jobTypes, + JobID: strconv.FormatInt(jobResult.JobID, 10), }) if err != nil { ctx.ServerError("Cloudbrain", err) @@ -441,8 +476,82 @@ func TransTrainJobStatus(status int) string { } } -func GetVersionOutputPathByTotalVersionCount(TotalVersionCount int) (VersionOutputPath string) { +func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) { talVersionCountToString := fmt.Sprintf("%04d", TotalVersionCount) VersionOutputPath = "V" + talVersionCountToString return VersionOutputPath } + +func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { + jobResult, err := createInferenceJob(models.CreateInferenceJobParams{ + JobName: req.JobName, + Description: req.Description, + InfConfig: models.InfConfig{ + WorkServerNum: req.WorkServerNumber, + AppUrl: req.CodeObsPath, + BootFileUrl: req.BootFileUrl, + DataUrl: req.DataUrl, + EngineID: req.EngineID, + // TrainUrl: req.TrainUrl, + LogUrl: req.LogUrl, + PoolID: req.PoolID, + CreateVersion: true, + Flavor: models.Flavor{ + Code: req.FlavorCode, + }, + Parameter: req.Parameters, + }, + }) + if err != nil { + log.Error("CreateJob failed: %v", err.Error()) + return err + } + + attach, err := models.GetAttachmentByUUID(req.Uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) + return err + } + + err = models.CreateCloudbrain(&models.Cloudbrain{ + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeInference), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.FlavorCode, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, + EngineName: req.EngineName, + LabelName: req.LabelName, + IsLatestVersion: req.IsLatestVersion, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + CkptName: req.CkptName, + ResultUrl: req.ResultUrl, + }) + + if err != nil { + log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) + return err + } + + return nil +} diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index 07f26ceb7..d102dca71 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -874,3 +874,59 @@ sendjob: return &result, nil } + +func createInferenceJob(createJobParams models.CreateInferenceJobParams) (*models.CreateTrainJobResult, error) { + checkSetting() + client := getRestyClient() + var result models.CreateTrainJobResult + + retry := 0 + +sendjob: + res, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetBody(createJobParams). + SetResult(&result). + Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob) + + if err != nil { + return nil, fmt.Errorf("resty create inference-job: %s", err) + } + + req, _ := json.Marshal(createJobParams) + log.Info("%s", req) + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if res.StatusCode() != http.StatusOK { + var temp models.ErrorResult + if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { + log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + } + log.Error("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + BootFileErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.BootFileUrl + "'." + DataSetErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.DataUrl + "'." + if temp.ErrorMsg == BootFileErrorMsg { + log.Error("启动文件错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + return &result, fmt.Errorf("启动文件错误!") + } + if temp.ErrorMsg == DataSetErrorMsg { + log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + return &result, fmt.Errorf("数据集错误!") + } + return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + } + + if !result.IsSuccess { + log.Error("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) + return &result, fmt.Errorf("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) + } + + return &result, nil +} diff --git a/modules/storage/obs.go b/modules/storage/obs.go index 367ffe1e8..a68bb7771 100755 --- a/modules/storage/obs.go +++ b/modules/storage/obs.go @@ -28,6 +28,13 @@ type FileInfo struct { ParenDir string `json:"ParenDir"` UUID string `json:"UUID"` } +type FileInfoList []FileInfo + +func (ulist FileInfoList) Swap(i, j int) { ulist[i], ulist[j] = ulist[j], ulist[i] } +func (ulist FileInfoList) Len() int { return len(ulist) } +func (ulist FileInfoList) Less(i, j int) bool { + return strings.Compare(ulist[i].FileName, ulist[j].FileName) > 0 +} //check if has the object func ObsHasObject(path string) (bool, error) { @@ -333,7 +340,8 @@ func GetAllObjectByBucketAndPrefix(bucket string, prefix string) ([]FileInfo, er input.MaxKeys = 100 input.Prefix = prefix index := 1 - fileInfos := make([]FileInfo, 0) + fileInfoList := FileInfoList{} + prefixLen := len(prefix) log.Info("prefix=" + input.Prefix) for { @@ -358,7 +366,7 @@ func GetAllObjectByBucketAndPrefix(bucket string, prefix string) ([]FileInfo, er IsDir: isDir, ParenDir: "", } - fileInfos = append(fileInfos, fileInfo) + fileInfoList = append(fileInfoList, fileInfo) } if output.IsTruncated { input.Marker = output.NextMarker @@ -373,13 +381,14 @@ func GetAllObjectByBucketAndPrefix(bucket string, prefix string) ([]FileInfo, er return nil, err } } - return fileInfos, nil + sort.Sort(fileInfoList) + return fileInfoList, nil } -func GetObsListObject(jobName, parentDir, versionName string) ([]FileInfo, error) { +func GetObsListObject(jobName, outPutPath, parentDir, versionName string) ([]FileInfo, error) { input := &obs.ListObjectsInput{} input.Bucket = setting.Bucket - input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, versionName, parentDir), "/") + input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, outPutPath, versionName, parentDir), "/") strPrefix := strings.Split(input.Prefix, "/") output, err := ObsCli.ListObjects(input) fileInfos := make([]FileInfo, 0) @@ -401,7 +410,7 @@ func GetObsListObject(jobName, parentDir, versionName string) ([]FileInfo, error nextParentDir = parentDir + "/" + fileName } - if fileName == strPrefix[len(strPrefix)-1] || (fileName+"/") == setting.OutPutPath { + if fileName == strPrefix[len(strPrefix)-1] || (fileName+"/") == outPutPath { continue } } else { diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index d07e048bf..26fb35ed5 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -221,42 +221,42 @@ issues.in_your_repos = In your repositories contributors = Contributors page_title=Explore Better AI -page_small_title=OpenI AI development cooperation platform +page_small_title=OpenI AI Development Cooperation Platform page_description=The one-stop collaborative development environment for AI field provides AI development pipeline integrating code development, data management, model debugging, reasoning and evaluation page_use=Use Now -page_only_dynamic=Show only open source project dynamics -page_recommend_org=Recommended organization -page_recommend_org_desc=These excellent organizations are using Qizhi AI to develop collaboration platforms; Your organization also wants to show here, -page_recommend_org_commit=Click here to submit -page_recommend_org_more=More organizations -page_recommend_repo=Recommended projects -page_recommend_repo_desc=Excellent AI project recommendation; Your project also wants to show here, -page_recommend_repo_commit=Click here to submit -page_recommend_repo_go=. Click here -page_recommend_repo_more=Project Square -page_dev_env=Collaborative development environment -page_dev_env_desc=The biggest difference between Qizhi AI collaborative development platform and traditional git platform is that it provides a collaborative development environment for AI development -page_dev_env_desc_title=Unified management of development elements -page_dev_env_desc_desc=The platform provides four elements of AI development: unified management of model code, data set, model and execution environment -page_dev_env_desc1_title=Data collaboration and sharing -page_dev_env_desc1_desc=By uploading data sets in the project, many project members cooperate to complete data preprocessing; You can also establish a better model with community developers by setting the data as a public dataset -page_dev_env_desc2_title=Model management and sharing -page_dev_env_desc2_desc=Associate the model with the code version, adjust the model in different ways based on the code history version, and save the results; The trained model can be open and shared, so that more people can use the model to test and give feedback -page_dev_env_desc3_title=One configuration, multiple use -page_dev_env_desc3_desc=Provide execution environment sharing, one-time configuration and multiple use, reduce the threshold of model development, and avoid spending repeated time configuring complex environments -page_dev_yunlao=PengCheng Cloudbrain open source collaboration -page_dev_yunlao_desc1=The platform has been connected with Pengcheng Cloudbrain and can use the rich computing resources of Pengcheng Cloudbrain to complete AI development tasks -page_dev_yunlao_desc2=Pengcheng Cloudbrain's existing AI computing power is 100p FLOPS@FP16 (billions of half precision floating-point calculations per second), the main hardware infrastructure is composed of GPU server equipped with NVIDIA Tesla V100 and Atlas 900 AI cluster equipped with Kunpeng and shengteng processors -page_dev_yunlao_desc3=Developers can freely choose the corresponding computing resources according to the use requirements, and can test the adaptability, performance and stability of the model in different hardware environments -page_dev_yunlao_desc4=If your model needs more computing resources, you can also apply separately -page_dev_yunlao_apply=Separate apply +page_only_dynamic=Only show the dynamics of open source projects +page_recommend_org=Recommended Organization +page_recommend_org_desc=These excellent organizations are using the OpenI AI Collaboration Platform for collaborative development of projects. To show your organization here, +page_recommend_org_commit=Click here to submit. +page_recommend_org_more=More Organizations +page_recommend_repo=Recommended Projects +page_recommend_repo_desc=Excellent AI projects recommendation. To show your project here, +page_recommend_repo_commit=Click here to submit. +page_recommend_repo_go=Click here to +page_recommend_repo_more=explore more projects. +page_dev_env=Collaborative Development Environment +page_dev_env_desc=Provide a collaborative development environment for AI development, which is the biggest highlight that distinguishes the OpenI AI Collaboration Platform from other traditional Git platforms. +page_dev_env_desc_title=Unified Management of Development Elements +page_dev_env_desc_desc=The platform provides four elements of AI development: unified management of model code, data set, model and execution environment. +page_dev_env_desc1_title=Data Collaboration and Sharing +page_dev_env_desc1_desc=By uploading data sets in the project, many project members cooperate to complete data preprocessing. You can also establish a better model with community developers by setting the data as a public dataset. +page_dev_env_desc2_title=Model Management and Sharing +page_dev_env_desc2_desc=Associate the model with the code version, you can adjust the model in different ways based on the historical version of the code and save the results. The trained model can be open and shared, so that more people can use the model to test and give feedback. +page_dev_env_desc3_title=Once Configuration, Multiple Reuse +page_dev_env_desc3_desc=Provide execution environment sharing, Once Configuration, Multiple Reuse. Lower the threshold of model development, and avoid spending repetitive time configuring complex environments. +page_dev_yunlao=PengCheng Cloudbrain Open Source Collaboration +page_dev_yunlao_desc1=The platform has been connected with Pengcheng Cloudbrain and can use the rich computing resources of Pengcheng Cloudbrain to complete AI development tasks. +page_dev_yunlao_desc2=Pengcheng Cloudbrain's existing AI computing power is 100p FLOPS@FP16 (billions of half precision floating-point calculations per second), the main hardware infrastructure is composed of GPU server equipped with NVIDIA Tesla V100 and Atlas 900 AI cluster equipped with Kunpeng and Ascend processors. +page_dev_yunlao_desc3=Developers can freely choose the corresponding computing resources according to their needs, and can test the adaptability, performance, stability of the model in different hardware environments. +page_dev_yunlao_desc4=If your model requires more computing resources, you can also apply for it separately. +page_dev_yunlao_apply=Apply Separately [explore] repos = Repositories select_repos = Select the project users = Users organizations = Organizations -images = CloudImages +images = Cloudbrain Mirror search = Search code = Code repo_no_results = No matching repositories found. @@ -630,7 +630,7 @@ oauth2_application_create_description = OAuth2 applications gives your third-par oauth2_application_remove_description = Removing an OAuth2 application will prevent it to access authorized user accounts on this instance. Continue? authorized_oauth2_applications = Authorized OAuth2 Applications -authorized_oauth2_applications_description = You've granted access to your personal openi account to these third party applications. Please revoke access for applications no longer needed. +authorized_oauth2_applications_description = You have granted access to your personal openi account to these third party applications. Please revoke access for applications no longer needed. revoke_key = Revoke revoke_oauth2_grant = Revoke Access revoke_oauth2_grant_description = Revoking access for this third party application will prevent this application from accessing your data. Are you sure? @@ -869,6 +869,7 @@ modelarts.notebook=Debug Task modelarts.train_job=Train Task modelarts.train_job.new_debug= New Debug Task modelarts.train_job.new_train=New Train Task +modelarts.train_job.new_infer=New Inference Task modelarts.train_job.config=Configuration information modelarts.train_job.new=New train Task modelarts.train_job.new_place=The description should not exceed 256 characters @@ -882,6 +883,8 @@ modelarts.parent_version=Parent Version modelarts.run_version=Run Version modelarts.train_job.compute_node=Compute Node modelarts.create_model = Create Model +modelarts.model_label=Model Label +modelarts.infer_dataset = Inference Dataset modelarts.train_job.basic_info=Basic Info @@ -928,6 +931,13 @@ modelarts.train_job_para_admin=train_job_para_admin modelarts.train_job_para.edit=train_job_para.edit modelarts.train_job_para.connfirm=train_job_para.connfirm +modelarts.infer_job_model = Model +modelarts.infer_job_model_file = Model File +modelarts.infer_job = Inference Job +modelarts.infer_job.model_version = Model/Version +modelarts.infer_job.select_model = Select Model +modelarts.infer_job.tooltip = The model has been deleted and cannot be viewed. + model.manage.import_new_model=Import New Model model.manage.create_error=Equal Name and Version has existed. model.manage.model_name = Model Name @@ -2687,6 +2697,7 @@ error.unit_not_allowed = You are not allowed to access this repository section. head.community = Community head.project = Repositories head.openi = OpenI +head.openi.repo = OpenI Projects head.dataset = Datasets foot.council = Council foot.technical_committee = Technical Committee diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 6dd44d848..874e4446b 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -831,7 +831,7 @@ debug=调试 debug_again=再次调试 stop=停止 delete=删除 -model_download=模型下载 +model_download=结果下载 submit_image=提交镜像 download=模型下载 @@ -876,9 +876,10 @@ modelarts.notebook=调试任务 modelarts.train_job=训练任务 modelarts.train_job.new_debug=新建调试任务 modelarts.train_job.new_train=新建训练任务 +modelarts.train_job.new_infer=新建推理任务 modelarts.train_job.config=配置信息 modelarts.train_job.new=新建训练任务 -modelarts.train_job.new_place=描述字数不超过256个字符 +modelarts.train_job.new_place=描述字数不超过255个字符 modelarts.model_name=模型名称 modelarts.model_size=模型大小 modelarts.import_model=导入模型 @@ -888,6 +889,8 @@ modelarts.current_version=当前版本 modelarts.parent_version=父版本 modelarts.run_version=运行版本 modelarts.create_model=创建模型 +modelarts.model_label=模型标签 +modelarts.infer_dataset = 推理数据集 @@ -929,7 +932,7 @@ modelarts.train_job.NAS_mount_path=NAS挂载路径 modelarts.train_job.query_whether_save_parameter=保存作业参数 modelarts.train_job.save_helper=保存当前作业的配置参数,后续您可以使用已保存的配置参数快速创建训练作业。 modelarts.train_job.common_frame=常用框架 -modelarts.train_job.amount_of_compute_node=计算节点个数 +modelarts.train_job.amount_of_compute_node=计算节点数 modelarts.train_job.job_parameter_name=任务参数名称 modelarts.train_job.parameter_description=任务参数描述 modelarts.log=日志 @@ -939,6 +942,14 @@ modelarts.train_job_para_admin=任务参数管理 modelarts.train_job_para.edit=编辑 modelarts.train_job_para.connfirm=确定 +modelarts.infer_job_model = 模型名称 +modelarts.infer_job_model_file = 模型文件 +modelarts.infer_job = 推理任务 +modelarts.infer_job.model_version = 模型/版本 +modelarts.infer_job.select_model = 选择模型 +modelarts.infer_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。比如inference.py、main.py、example/inference.py、case/main.py。 +modelarts.infer_job.tooltip = 该模型已删除,无法查看。 + model.manage.import_new_model=导入新模型 model.manage.create_error=相同的名称和版本的模型已经存在。 model.manage.model_name = 模型名称 diff --git a/public/home/home.js b/public/home/home.js index f38e6f18e..e3b0387d4 100644 --- a/public/home/home.js +++ b/public/home/home.js @@ -42,10 +42,10 @@ if(document.location.host == "git.openi.org.cn" || document.URL.startsWith("http var socket = new WebSocket(url); socket.onopen = function () { + messageQueue = []; console.log("message has connected."); }; -var messageQueue = []; var maxSize = 20; var html =document.documentElement; var lang = html.attributes["lang"] @@ -165,12 +165,12 @@ function getTime(UpdatedUnix,currentTime){ var seconds= leave3; if(hours == 0 && minutes == 0){ - return seconds + getRepoOrOrg(6,isZh); + return seconds + getRepoOrOrg(6,isZh,seconds); }else{ if(hours > 0){ - return hours + getRepoOrOrg(4,isZh); + return hours + getRepoOrOrg(4,isZh,hours); }else{ - return minutes + getRepoOrOrg(5,isZh); + return minutes + getRepoOrOrg(5,isZh,minutes); } } } @@ -239,7 +239,7 @@ var actionNameZH={ "5":"推送了 {branch} 分支的代码到", "6":"创建了任务", "7":"创建了合并请求", - "9":"推送了 {branch} 分支的代码到", + "9":"推送了标签 {branch} 到", "10":"评论了任务", "11":"合并了合并请求", "12":"关闭了任务", @@ -247,7 +247,7 @@ var actionNameZH={ "14":"关闭了合并请求", "15":"重新开启了合并请求", "17":"从 {repoName} 删除分支 {deleteBranchName}", - "22":"拒绝了合并请求", + "22":"建议变更", "23":"评论了合并请求" }; @@ -257,7 +257,7 @@ var actionNameEN={ "5":" pushed to {branch} at", "6":" opened issue", "7":" created pull request", - "9":" pushed to {branch} at", + "9":" pushed tag {branch} to ", "10":" commented on issue", "11":" merged pull request", "12":" closed issue", @@ -265,7 +265,7 @@ var actionNameEN={ "14":" closed pull request", "15":" reopened pull request", "17":" deleted branch {deleteBranchName} from {repoName}", - "22":" rejected pull request", + "22":" proposed changes", "23":" commented on pull request" }; @@ -273,18 +273,30 @@ var repoAndOrgZH={ "1":"项目", "2":"成员", "3":"团队", + "11":"项目", + "21":"成员", + "31":"团队", "4":"小时前", "5":"分钟前", - "6":"秒前" + "6":"秒前", + "41":"小时前", + "51":"分钟前", + "61":"秒前" }; var repoAndOrgEN={ - "1":"repository", - "2":"Members ", - "3":"Teams", - "4":" hours ago", - "5":" minutes ago", - "6":" seconds ago" + "1":"Repository", + "2":"Member ", + "3":"Team", + "11":"Repositorys", + "22":"Members ", + "31":"Teams", + "4":" hour ago", + "5":" minute ago", + "6":" second ago", + "41":" hours ago", + "51":" minutes ago", + "61":" seconds ago" }; @@ -415,7 +427,10 @@ function displayRepo(json){ //var repoAndOrgEN = new Map([['1', "Repository"], ['2', "Members"], ['3', "Teams"]]); -function getRepoOrOrg(key,isZhLang){ +function getRepoOrOrg(key,isZhLang,numbers=1){ + if(numbers > 1){ + key+="1"; + } if(isZhLang){ return repoAndOrgZH[key]; }else{ @@ -436,7 +451,7 @@ function displayOrg(json){ html += " "; html += "
"; html += " " + record["Name"] + " " + record["FullName"]; - html += "
" + record["NumRepos"] +" " + getRepoOrOrg(1,isZh) + " ・ " + record["NumMembers"] +" " + getRepoOrOrg(2,isZh) + " ・ " + record["NumTeams"] + " " + getRepoOrOrg(3,isZh) + "
"; + html += "
" + record["NumRepos"] +" " + getRepoOrOrg(1,isZh,record["NumRepos"]) + " ・ " + record["NumMembers"] +" " + getRepoOrOrg(2,isZh,record["NumMembers"]) + " ・ " + record["NumTeams"] + " " + getRepoOrOrg(3,isZh,record["NumTeams"]) + "
"; html += "
"; html += " "; html += " "; diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index dcea46ed6..8b87110c9 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -892,6 +892,15 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/model_list", repo.ModelList) }) }) + m.Group("/inference-job", func() { + m.Group("/:jobid", func() { + m.Get("", repo.GetModelArtsInferenceJob) + m.Get("/log", repo.TrainJobGetLog) + m.Post("/del_version", repo.DelTrainJobVersion) + m.Post("/stop_version", repo.StopTrainJobVersion) + m.Get("/result_list", repo.ResultList) + }) + }) }, reqRepoReader(models.UnitTypeCloudBrain)) }, repoAssignment()) }) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 05c31b5f5..679d05305 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -133,7 +133,6 @@ func TrainJobGetLog(ctx *context.APIContext) { var jobID = ctx.Params(":jobid") var versionName = ctx.Query("version_name") - // var logFileName = ctx.Query("file_name") var baseLine = ctx.Query("base_line") var order = ctx.Query("order") var lines = ctx.Query("lines") @@ -222,12 +221,14 @@ func DelTrainJobVersion(ctx *context.APIContext) { } //获取删除后的版本数量 + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeTrain)) repo := ctx.Repo.Repository VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ - RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), - JobID: jobID, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobTypes: jobTypes, + JobID: jobID, }) if err != nil { ctx.ServerError("get VersionListCount failed", err) @@ -299,7 +300,80 @@ func ModelList(ctx *context.APIContext) { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) return } - models, err := storage.GetObsListObject(task.JobName, parentDir, versionName) + models, err := storage.GetObsListObject(task.JobName, "output/", parentDir, versionName) + if err != nil { + log.Info("get TrainJobListModel failed:", err) + ctx.ServerError("GetObsListObject:", err) + return + } + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "VersionName": versionName, + "StatusOK": 0, + "Path": dirArray, + "Dirs": models, + "task": task, + "PageIsCloudBrain": true, + }) +} + +func GetModelArtsInferenceJob(ctx *context.APIContext) { + var ( + err error + ) + + jobID := ctx.Params(":jobid") + job, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + ctx.NotFound(err) + return + } + result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) + if err != nil { + ctx.NotFound(err) + return + } + + job.Status = modelarts.TransTrainJobStatus(result.IntStatus) + job.Duration = result.Duration + job.TrainJobDuration = result.TrainJobDuration + + if result.Duration != 0 { + job.TrainJobDuration = util.AddZero(result.Duration/3600000) + ":" + util.AddZero(result.Duration%3600000/60000) + ":" + util.AddZero(result.Duration%60000/1000) + + } else { + job.TrainJobDuration = "00:00:00" + } + + err = models.UpdateInferenceJob(job) + if err != nil { + log.Error("UpdateJob failed:", err) + } + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "JobStatus": job.Status, + "JobDuration": job.TrainJobDuration, + }) + +} + +func ResultList(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + var versionName = ctx.Query("version_name") + parentDir := ctx.Query("parentDir") + dirArray := strings.Split(parentDir, "/") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + models, err := storage.GetObsListObject(task.JobName, "result/", parentDir, versionName) if err != nil { log.Info("get TrainJobListModel failed:", err) ctx.ServerError("GetObsListObject:", err) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 845dbbc6b..fe4d9794c 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -146,7 +146,8 @@ func SaveModel(ctx *context.Context) { if !trainTaskCreate { if !ctx.Repo.CanWrite(models.UnitTypeModelManage) { - ctx.ServerError("No right.", errors.New(ctx.Tr("repo.model_noright"))) + //ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + ctx.JSON(403, ctx.Tr("repo.model_noright")) return } } @@ -209,20 +210,11 @@ func DeleteModel(ctx *context.Context) { }) } } -func isCanDeleteOrDownload(ctx *context.Context, model *models.AiModelManage) bool { - if ctx.User.IsAdmin || ctx.User.ID == model.UserId { - return true - } - if ctx.Repo.IsOwner() { - return true - } - return false -} func deleteModelByID(ctx *context.Context, id string) error { log.Info("delete model start. id=" + id) model, err := models.QueryModelById(id) - if !isCanDeleteOrDownload(ctx, model) { + if !isCanDelete(ctx, model.UserId) { return errors.New(ctx.Tr("repo.model_noright")) } if err == nil { @@ -278,8 +270,8 @@ func DownloadMultiModelFile(ctx *context.Context) { ctx.ServerError("no such model:", err) return } - if !isCanDeleteOrDownload(ctx, task) { - ctx.ServerError("no right.", errors.New(ctx.Tr("repo.model_noright"))) + if !isOper(ctx, task.UserId) { + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } @@ -371,7 +363,16 @@ func DownloadSingleModelFile(ctx *context.Context) { parentDir := ctx.Query("parentDir") fileName := ctx.Query("fileName") path := Model_prefix + models.AttachmentRelativePath(id) + "/" + parentDir + fileName - + task, err := models.QueryModelById(id) + if err != nil { + log.Error("no such model!", err.Error()) + ctx.ServerError("no such model:", err) + return + } + if !isOper(ctx, task.UserId) { + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + return + } if setting.PROXYURL != "" { body, err := storage.ObsDownloadAFile(setting.Bucket, path) if err != nil { @@ -414,6 +415,8 @@ func ShowModelInfo(ctx *context.Context) { ctx.Data["ID"] = ctx.Query("ID") ctx.Data["name"] = ctx.Query("name") ctx.Data["isModelManage"] = true + ctx.Data["ModelManageAccess"] = ctx.Repo.CanWrite(models.UnitTypeModelManage) + ctx.HTML(200, tplModelInfo) } @@ -426,6 +429,7 @@ func ShowSingleModel(ctx *context.Context) { userIds := make([]int64, len(models)) for i, model := range models { model.IsCanOper = isOper(ctx, model.UserId) + model.IsCanDelete = isCanDelete(ctx, model.UserId) userIds[i] = model.UserId } userNameMap := queryUserName(userIds) @@ -468,6 +472,7 @@ func ShowOneVersionOtherModel(ctx *context.Context) { userIds := make([]int64, len(aimodels)) for i, model := range aimodels { model.IsCanOper = isOper(ctx, model.UserId) + model.IsCanDelete = isCanDelete(ctx, model.UserId) userIds[i] = model.UserId } userNameMap := queryUserName(userIds) @@ -487,8 +492,7 @@ func ShowOneVersionOtherModel(ctx *context.Context) { } } -func ShowModelTemplate(ctx *context.Context) { - ctx.Data["isModelManage"] = true +func SetModelCount(ctx *context.Context) { repoId := ctx.Repo.Repository.ID Type := -1 _, count, _ := models.QueryModel(&models.AiModelQueryOptions{ @@ -501,10 +505,15 @@ func ShowModelTemplate(ctx *context.Context) { New: MODEL_LATEST, }) ctx.Data["MODEL_COUNT"] = count +} +func ShowModelTemplate(ctx *context.Context) { + ctx.Data["isModelManage"] = true + repoId := ctx.Repo.Repository.ID + SetModelCount(ctx) + ctx.Data["ModelManageAccess"] = ctx.Repo.CanWrite(models.UnitTypeModelManage) _, trainCount, _ := models.QueryModelTrainJobList(repoId) log.Info("query train count=" + fmt.Sprint(trainCount)) - ctx.Data["TRAIN_COUNT"] = trainCount ctx.HTML(200, tplModelManageIndex) } @@ -520,11 +529,24 @@ func isQueryRight(ctx *context.Context) bool { } } +func isCanDelete(ctx *context.Context, modelUserId int64) bool { + if ctx.User == nil { + return false + } + if ctx.User.IsAdmin || ctx.User.ID == modelUserId { + return true + } + if ctx.Repo.IsOwner() { + return true + } + return false +} + func isOper(ctx *context.Context, modelUserId int64) bool { if ctx.User == nil { return false } - if ctx.User.IsAdmin || ctx.Repo.IsOwner() || ctx.User.ID == modelUserId { + if ctx.User.IsAdmin || ctx.User.ID == modelUserId { return true } return false @@ -533,7 +555,7 @@ func isOper(ctx *context.Context, modelUserId int64) bool { func ShowModelPageInfo(ctx *context.Context) { log.Info("ShowModelInfo start.") if !isQueryRight(ctx) { - ctx.ServerError("no right.", errors.New(ctx.Tr("repo.model_noright"))) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } page := ctx.QueryInt("page") @@ -563,6 +585,7 @@ func ShowModelPageInfo(ctx *context.Context) { userIds := make([]int64, len(modelResult)) for i, model := range modelResult { model.IsCanOper = isOper(ctx, model.UserId) + model.IsCanDelete = isCanDelete(ctx, model.UserId) userIds[i] = model.UserId } @@ -603,8 +626,9 @@ func ModifyModelInfo(ctx *context.Context) { ctx.ServerError("no such model:", err) return } - if !isCanDeleteOrDownload(ctx, task) { - ctx.ServerError("no right.", errors.New(ctx.Tr("repo.model_noright"))) + if !isOper(ctx, task.UserId) { + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + //ctx.ServerError("no right.", errors.New(ctx.Tr("repo.model_noright"))) return } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 7a952394e..0198856a5 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1,15 +1,18 @@ package repo import ( + "archive/zip" "encoding/json" "errors" "io" + "io/ioutil" "net/http" "os" "path" "strconv" "strings" "time" + "unicode/utf8" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/auth" @@ -37,6 +40,10 @@ const ( tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new" + + tplModelArtsInferenceJobIndex base.TplName = "repo/modelarts/inferencejob/index" + tplModelArtsInferenceJobNew base.TplName = "repo/modelarts/inferencejob/new" + tplModelArtsInferenceJobShow base.TplName = "repo/modelarts/inferencejob/show" ) func DebugJobIndex(ctx *context.Context) { @@ -49,12 +56,15 @@ func DebugJobIndex(ctx *context.Context) { page = 1 } debugType := modelarts.DebugType + jobTypeNot := false if debugListType == models.GPUResource { debugType = models.TypeCloudBrainOne } else if debugListType == models.NPUResource { debugType = models.TypeCloudBrainTwo } + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeBenchmark), string(models.JobTypeSnn4imagenet), string(models.JobTypeBrainScore), string(models.JobTypeDebug)) ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, @@ -62,8 +72,8 @@ func DebugJobIndex(ctx *context.Context) { }, RepoID: repo.ID, Type: debugType, - JobTypeNot: true, - JobType: string(models.JobTypeTrain), + JobTypeNot: jobTypeNot, + JobTypes: jobTypes, }) if err != nil { ctx.ServerError("Get debugjob faild:", err) @@ -367,6 +377,8 @@ func TrainJobIndex(ctx *context.Context) { page = 1 } + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeTrain)) tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, @@ -375,7 +387,7 @@ func TrainJobIndex(ctx *context.Context) { RepoID: repo.ID, Type: models.TypeCloudBrainTwo, JobTypeNot: false, - JobType: string(models.JobTypeTrain), + JobTypes: jobTypes, IsLatestVersion: modelarts.IsLatestVersion, }) if err != nil { @@ -749,7 +761,7 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true - VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(modelarts.TotalVersionCount) + VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) jobName := form.JobName uuid := form.Attachment description := form.Description @@ -794,18 +806,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } - // attach, err := models.GetAttachmentByUUID(uuid) - // if err != nil { - // log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) - // return - // } - //todo: del the codeLocalPath - // _, err := ioutil.ReadDir(codeLocalPath) - // if err == nil { - // os.RemoveAll(codeLocalPath) - // } - os.RemoveAll(codeLocalPath) + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } gitRepo, _ := git.OpenRepository(repo.RepoPath()) commitID, _ := gitRepo.GetBranchCommitID(branch_name) @@ -973,7 +978,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) return } - VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(latestTask.TotalVersionCount + 1) + VersionOutputPath := modelarts.GetOutputPathByCount(latestTask.TotalVersionCount + 1) jobName := form.JobName uuid := form.Attachment @@ -1011,18 +1016,17 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ return } - // attach, err := models.GetAttachmentByUUID(uuid) - // if err != nil { - // log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) - // return - // } - //todo: del the codeLocalPath - // _, err = ioutil.ReadDir(codeLocalPath) - // if err == nil { - // os.RemoveAll(codeLocalPath) - // } - os.RemoveAll(codeLocalPath) + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) + return + } + // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) commitID, _ := gitRepo.GetBranchCommitID(branch_name) @@ -1264,6 +1268,42 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { return nil } +func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) error { + if !strings.HasSuffix(form.BootFile, ".py") { + log.Error("the boot file(%s) must be a python file", form.BootFile) + return errors.New("启动文件必须是python文件") + } + + if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 { + log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber) + return errors.New("计算节点数必须在1-25之间") + } + + if form.ModelName == "" { + log.Error("the ModelName(%d) must not be nil", form.ModelName) + return errors.New("模型名称不能为空") + } + if form.ModelVersion == "" { + log.Error("the ModelVersion(%d) must not be nil", form.ModelVersion) + return errors.New("模型版本不能为空") + } + if form.CkptName == "" { + log.Error("the CkptName(%d) must not be nil", form.CkptName) + return errors.New("权重文件不能为空") + } + if form.BranchName == "" { + log.Error("the Branch(%d) must not be nil", form.BranchName) + return errors.New("分支名不能为空") + } + + if utf8.RuneCountInString(form.Description) > 255 { + log.Error("the Description length(%d) must not more than 255", form.Description) + return errors.New("描述字符不能超过255个字符") + } + + return nil +} + func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") @@ -1273,6 +1313,9 @@ func TrainJobShow(ctx *context.Context) { if page <= 0 { page = 1 } + + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeTrain)) VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ ListOptions: models.ListOptions{ Page: page, @@ -1280,7 +1323,7 @@ func TrainJobShow(ctx *context.Context) { }, RepoID: repo.ID, Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), + JobTypes: jobTypes, JobID: jobID, }) @@ -1392,10 +1435,12 @@ func TrainJobDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") repo := ctx.Repo.Repository + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeTrain)) VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ RepoID: repo.ID, Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), + JobTypes: jobTypes, JobID: jobID, }) if err != nil { @@ -1518,6 +1563,427 @@ func getConfigList(perPage, page int, sortBy, order, searchContent, configType s return list, nil } +func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) { + ctx.Data["PageIsTrainJob"] = true + VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) + jobName := form.JobName + uuid := form.Attachment + description := form.Description + workServerNumber := form.WorkServerNumber + engineID := form.EngineID + bootFile := form.BootFile + flavorCode := form.Flavor + params := form.Params + poolID := form.PoolID + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" + dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" + branch_name := form.BranchName + FlavorName := form.FlavorName + EngineName := form.EngineName + LabelName := form.LabelName + isLatestVersion := modelarts.IsLatestVersion + VersionCount := modelarts.VersionCount + trainUrl := form.TrainUrl + modelName := form.ModelName + modelVersion := form.ModelVersion + ckptName := form.CkptName + + ckptUrl := form.TrainUrl + form.CkptName + + if err := paramCheckCreateInferenceJob(form); err != nil { + log.Error("paramCheckCreateInferenceJob failed:(%v)", err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) + return + } + + count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting inference task", ctx.Data["MsgID"]) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("you have already a running or waiting inference task, can not create more", tplModelArtsInferenceJobNew, &form) + return + } + } + + //todo: del the codeLocalPath + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branch_name) + + if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ + Branch: branch_name, + }); err != nil { + log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsInferenceJobNew, &form) + return + } + + //todo: upload code (send to file_server todo this work?) + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to obsMkdir_result", tplModelArtsInferenceJobNew, &form) + return + } + + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsInferenceJobNew, &form) + return + } + + if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsInferenceJobNew, &form) + return + } + + //todo: del local code? + var parameters models.Parameters + param := make([]models.Parameter, 0) + param = append(param, models.Parameter{ + Label: modelarts.ResultUrl, + Value: "s3:/" + resultObsPath, + }, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("运行参数错误", tplModelArtsInferenceJobNew, &form) + return + } + + for _, parameter := range parameters.Parameter { + if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { + param = append(param, models.Parameter{ + Label: parameter.Label, + Value: parameter.Value, + }) + } + } + } + + req := &modelarts.GenerateInferenceJobReq{ + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: trainUrl, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: param, //modelarts训练时用到 + CommitID: commitID, + BranchName: branch_name, + Params: form.Params, + FlavorName: FlavorName, + EngineName: EngineName, + LabelName: LabelName, + IsLatestVersion: isLatestVersion, + VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, + ModelName: modelName, + ModelVersion: modelVersion, + CkptName: ckptName, + ResultUrl: resultObsPath, + } + + //将params转换Parameters.Parameter,出错时返回给前端 + // var Parameters modelarts.Parameters + // if err := json.Unmarshal([]byte(params), &Parameters); err != nil { + // ctx.ServerError("json.Unmarshal failed:", err) + // return + // } + + err = modelarts.GenerateInferenceJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error()) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") +} +func InferenceJobIndex(ctx *context.Context) { + MustEnableModelArts(ctx) + + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + + var jobTypes []string + jobTypes = append(jobTypes, string(models.JobTypeInference)) + tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobTypes: jobTypes, + }) + if err != nil { + ctx.ServerError("Cloudbrain", err) + return + } + + for i, task := range tasks { + tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) + tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) + tasks[i].ComputeResource = models.NPUResource + } + + repoId := ctx.Repo.Repository.ID + Type := -1 + _, model_count, _ := models.QueryModel(&models.AiModelQueryOptions{ + ListOptions: models.ListOptions{ + Page: 1, + PageSize: 2, + }, + RepoID: repoId, + Type: Type, + New: MODEL_LATEST, + }) + ctx.Data["MODEL_COUNT"] = model_count + + pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) + pager.SetDefaultParams(ctx) + ctx.Data["Page"] = pager + + ctx.Data["PageIsCloudBrain"] = true + ctx.Data["Tasks"] = tasks + ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) + ctx.Data["RepoIsEmpty"] = repo.IsEmpty + ctx.HTML(200, tplModelArtsInferenceJobIndex) +} +func InferenceJobNew(ctx *context.Context) { + err := inferenceJobNewDataPrepare(ctx) + if err != nil { + ctx.ServerError("get new inference-job info failed", err) + return + } + ctx.HTML(200, tplModelArtsInferenceJobNew) +} +func inferenceJobNewDataPrepare(ctx *context.Context) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = jobName + + attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + + ctx.Data["flavor_infos"] = flavorInfos.Info + ctx.Data["params"] = "" + ctx.Data["branchName"] = ctx.Repo.BranchName + + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + ctx.Data["config_list"] = configList.ParaConfigs + + repoId := ctx.Repo.Repository.ID + Type := -1 + _, model_count, _ := models.QueryModel(&models.AiModelQueryOptions{ + ListOptions: models.ListOptions{ + Page: 1, + PageSize: 2, + }, + RepoID: repoId, + Type: Type, + New: MODEL_LATEST, + }) + ctx.Data["MODEL_COUNT"] = model_count + + return nil +} + +func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var jobName = "inference" + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = jobName + + attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["flavor_infos"] = flavorInfos.Info + + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + var Parameters modelarts.Parameters + if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["config_list"] = configList.ParaConfigs + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["branch_name"] = form.BranchName + ctx.Data["model_name"] = form.ModelName + ctx.Data["model_version"] = form.ModelVersion + ctx.Data["ckpt_name"] = form.CkptName + ctx.Data["train_url"] = form.TrainUrl + + return nil +} +func InferenceJobShow(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + var jobID = ctx.Params(":jobid") + + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + task, err := models.GetCloudbrainByJobID(jobID) + + if err != nil { + log.Error("GetInferenceTask(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobShow, nil) + return + } + //设置权限 + canNewJob, err := canUserCreateTrainJobVersion(ctx, task.UserID) + if err != nil { + ctx.ServerError("canNewJob failed", err) + return + } + ctx.Data["canNewJob"] = canNewJob + + //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 + var parameters models.Parameters + err = json.Unmarshal([]byte(task.Parameters), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) + trainJobNewDataPrepare(ctx) + return + } + + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + "; " + paramTemp = paramTemp + param + } + task.Parameters = paramTemp[:len(paramTemp)-2] + } else { + task.Parameters = "" + } + + LabelName := strings.Fields(task.LabelName) + ctx.Data["labelName"] = LabelName + ctx.Data["jobID"] = jobID + ctx.Data["jobName"] = task.JobName + ctx.Data["task"] = task + + tempUids := []int64{} + tempUids = append(tempUids, task.UserID) + JobCreater, err := models.GetUserNamesByIDs(tempUids) + if err != nil { + log.Error("GetUserNamesByIDs (WhitelistUserIDs): %v", err) + } + ctx.Data["userName"] = JobCreater[0] + ctx.HTML(http.StatusOK, tplModelArtsInferenceJobShow) +} + func ModelDownload(ctx *context.Context) { var ( err error @@ -1546,6 +2012,31 @@ func ModelDownload(ctx *context.Context) { http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) } +func ResultDownload(ctx *context.Context) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + versionName := ctx.Query("version_name") + parentDir := ctx.Query("parent_dir") + fileName := ctx.Query("file_name") + log.Info("DownloadResult start.") + task, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + ctx.Data["error"] = err.Error() + } + path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, "result/", versionName, parentDir, fileName), "/") + log.Info("Download path is:%s", path) + + url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) + if err != nil { + log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("GetObsCreateSignedUrl", err) + return + } + http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) +} func DeleteJobStorage(jobName string) error { //delete local localJobPath := setting.JobPath + jobName @@ -1563,3 +2054,82 @@ func DeleteJobStorage(jobName string) error { return nil } + +func DownloadMultiResultFile(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + var versionName = ctx.Query("version_name") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + // if !isCanDeleteOrDownload(ctx, task) { + // ctx.ServerError("no right.", errors.New(ctx.Tr("repo.model_noright"))) + // return + // } + + // path := Model_prefix + models.AttachmentRelativePath(id) + "/" + path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, "result/", versionName), "/") + "/" + + allFile, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, path) + if err == nil { + //count++ + // models.ModifyModelDownloadCount(id) + + returnFileName := task.JobName + ".zip" + ctx.Resp.Header().Set("Content-Disposition", "attachment; filename="+returnFileName) + ctx.Resp.Header().Set("Content-Type", "application/octet-stream") + w := zip.NewWriter(ctx.Resp) + defer w.Close() + for _, oneFile := range allFile { + if oneFile.IsDir { + log.Info("zip dir name:" + oneFile.FileName) + } else { + log.Info("zip file name:" + oneFile.FileName) + fDest, err := w.Create(oneFile.FileName) + if err != nil { + log.Info("create zip entry error, download file failed: %s\n", err.Error()) + ctx.ServerError("download file failed:", err) + return + } + body, err := storage.ObsDownloadAFile(setting.Bucket, path+oneFile.FileName) + if err != nil { + log.Info("download file failed: %s\n", err.Error()) + ctx.ServerError("download file failed:", err) + return + } else { + defer body.Close() + p := make([]byte, 1024) + var readErr error + var readCount int + // 读取对象内容 + for { + readCount, readErr = body.Read(p) + if readCount > 0 { + fDest.Write(p[:readCount]) + } + if readErr != nil { + break + } + } + } + } + } + } else { + log.Info("error,msg=" + err.Error()) + ctx.ServerError("no file to download.", err) + } +} + +func SetJobCount(ctx *context.Context) { + repoId := ctx.Repo.Repository.ID + _, jobCount, err := models.Cloudbrains(&models.CloudbrainsOptions{ + RepoID: repoId, + Type: modelarts.DebugType, + }) + if err != nil { + ctx.ServerError("Get job faild:", err) + return + } + ctx.Data["jobCount"] = jobCount +} diff --git a/routers/repo/repo_statistic.go b/routers/repo/repo_statistic.go index 11b421659..dce183f49 100755 --- a/routers/repo/repo_statistic.go +++ b/routers/repo/repo_statistic.go @@ -51,12 +51,14 @@ func RepoStatisticDaily(date string) { isInitMinMaxRadar := false + var error_projects = make([]string, 0) for _, repo := range repos { - log.Info("start statistic: %s", getDistinctProjectName(repo)) + projectName := getDistinctProjectName(repo) + log.Info("start statistic: %s", projectName) var numDevMonths, numWikiViews, numContributor, numKeyContributor, numCommitsGrowth, numCommitLinesGrowth, numContributorsGrowth, numCommits int64 repoGitStat, err := models.GetRepoKPIStats(repo) if err != nil { - log.Error("GetRepoKPIStats failed: %s", getDistinctProjectName(repo)) + log.Error("GetRepoKPIStats failed: %s", projectName) } else { numDevMonths = repoGitStat.DevelopAge numKeyContributor = repoGitStat.KeyContributors @@ -79,26 +81,26 @@ func RepoStatisticDaily(date string) { var numVersions int64 numVersions, err = models.GetReleaseCountByRepoID(repo.ID, models.FindReleasesOptions{}) if err != nil { - log.Error("GetReleaseCountByRepoID failed(%s): %v", getDistinctProjectName(repo), err) + log.Error("GetReleaseCountByRepoID failed(%s): %v", projectName, err) } var datasetSize int64 datasetSize, err = getDatasetSize(repo) if err != nil { - log.Error("getDatasetSize failed(%s): %v", getDistinctProjectName(repo), err) + log.Error("getDatasetSize failed(%s): %v", projectName, err) } var numComments int64 numComments, err = models.GetCommentCountByRepoID(repo.ID) if err != nil { - log.Error("GetCommentCountByRepoID failed(%s): %v", getDistinctProjectName(repo), err) + log.Error("GetCommentCountByRepoID failed(%s): %v", projectName, err) } beginTime, endTime := getStatTime(date) var numVisits int numVisits, err = repository.AppointProjectView(repo.OwnerName, repo.Name, beginTime, endTime) if err != nil { - log.Error("AppointProjectView failed(%s): %v", getDistinctProjectName(repo), err) + log.Error("AppointProjectView failed(%s): %v", projectName, err) } repoStat := models.RepoStatistic{ @@ -162,9 +164,10 @@ func RepoStatisticDaily(date string) { } if _, err = models.InsertRepoStat(&repoStat); err != nil { - log.Error("InsertRepoStat failed(%s): %v", getDistinctProjectName(repo), err) - log.Error("failed statistic: %s", getDistinctProjectName(repo)) - mailer.SendWarnNotifyMail(setting.Warn_Notify_Mails, warnEmailMessage) + log.Error("InsertRepoStat failed(%s): %v", projectName, err) + log.Error("failed statistic: %s", projectName) + error_projects = append(error_projects, projectName) + continue } @@ -247,6 +250,10 @@ func RepoStatisticDaily(date string) { log.Info("finish statistic: %s", getDistinctProjectName(repo)) } + if len(error_projects) > 0 { + mailer.SendWarnNotifyMail(setting.Warn_Notify_Mails, warnEmailMessage) + } + //radar map log.Info("begin statistic radar") for _, radarInit := range reposRadar { diff --git a/routers/repo/setting.go b/routers/repo/setting.go index f7da8f4a8..7bb1a477b 100644 --- a/routers/repo/setting.go +++ b/routers/repo/setting.go @@ -50,6 +50,8 @@ func Settings(ctx *context.Context) { ctx.Data["Title"] = ctx.Tr("repo.settings") ctx.Data["PageIsSettingsOptions"] = true ctx.Data["ForcePrivate"] = setting.Repository.ForcePrivate + SetModelCount(ctx) + SetJobCount(ctx) ctx.HTML(200, tplSettingsOptions) } @@ -57,7 +59,8 @@ func Settings(ctx *context.Context) { func SettingsPost(ctx *context.Context, form auth.RepoSettingForm) { ctx.Data["Title"] = ctx.Tr("repo.settings") ctx.Data["PageIsSettingsOptions"] = true - + SetModelCount(ctx) + SetJobCount(ctx) repo := ctx.Repo.Repository switch ctx.Query("action") { diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 439c17a92..62ed21eb4 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1033,6 +1033,17 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) + + m.Group("/inference-job", func() { + m.Get("", reqRepoCloudBrainReader, repo.InferenceJobIndex) + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.InferenceJobShow) + m.Get("/result_download", cloudbrain.AdminOrJobCreaterRight, repo.ResultDownload) + m.Get("/downloadall", repo.DownloadMultiResultFile) + }) + m.Get("/create", reqRepoCloudBrainWriter, repo.InferenceJobNew) + m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) + }) }, context.RepoRef()) m.Group("/blockchain", func() { diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go index eeb496108..c59936581 100644 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,6 +10,8 @@ import ( "github.com/elliotchance/orderedmap" ) +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23} + type ClientsManager struct { Clients *orderedmap.OrderedMap Register chan *Client @@ -47,13 +49,15 @@ func (h *ClientsManager) Run() { close(client.Send) } case message := <-models.ActionChan: - LastActionsQueue.Push(message) - for _, client := range h.Clients.Keys() { - select { - case client.(*Client).Send <- message: - default: - close(client.(*Client).Send) - h.Clients.Delete(client) + if isInOpTypes(opTypes, message.OpType) { + LastActionsQueue.Push(message) + for _, client := range h.Clients.Keys() { + select { + case client.(*Client).Send <- message: + default: + close(client.(*Client).Send) + h.Clients.Delete(client) + } } } case s := <-sig: @@ -71,8 +75,19 @@ func (h *ClientsManager) Run() { } } +func isInOpTypes(types []int, opType models.ActionType) bool { + isFound := false + for _, value := range types { + if value == int(opType) { + isFound = true + break + } + } + return isFound +} + func initActionQueue() { - actions, err := models.GetLast20PublicFeeds() + actions, err := models.GetLast20PublicFeeds(opTypes) if err == nil { for i := len(actions) - 1; i >= 0; i-- { diff --git a/templates/base/head_navbar.tmpl b/templates/base/head_navbar.tmpl index d1d40d1d6..d8d9d50f2 100755 --- a/templates/base/head_navbar.tmpl +++ b/templates/base/head_navbar.tmpl @@ -43,7 +43,7 @@ {{if .IsOperator}} {{.i18n.Tr "explore.data_analysis"}} {{end}} - {{.i18n.Tr "custom.head.openi"}} + {{.i18n.Tr "custom.head.openi.repo"}} {{else if .IsLandingPageHome}} diff --git a/templates/home.tmpl b/templates/home.tmpl index 3a1cca49e..2737afd54 100755 --- a/templates/home.tmpl +++ b/templates/home.tmpl @@ -57,7 +57,7 @@

{{.page_recommend_repo}}

-

{{.page_recommend_repo_desc}}{{.page_recommend_repo_commit}}{{.page_recommend_repo_go}}{{.page_recommend_repo_more}}

+

{{.page_recommend_repo_desc}}{{.page_recommend_repo_commit}}{{.page_recommend_repo_go}} {{.page_recommend_repo_more}}

diff --git a/templates/repo/cloudbrain/new.tmpl b/templates/repo/cloudbrain/new.tmpl index eb7805a23..e39e7cb22 100755 --- a/templates/repo/cloudbrain/new.tmpl +++ b/templates/repo/cloudbrain/new.tmpl @@ -147,7 +147,7 @@
- +
@@ -192,7 +192,7 @@
- + {{range .images}} @@ -225,27 +225,27 @@
- +
- +
- +
- +
- +
- +
- +
diff --git a/templates/repo/datasets/index.tmpl b/templates/repo/datasets/index.tmpl index fb7396224..167b1ef44 100755 --- a/templates/repo/datasets/index.tmpl +++ b/templates/repo/datasets/index.tmpl @@ -57,7 +57,7 @@
- +
diff --git a/templates/repo/debugjob/index.tmpl b/templates/repo/debugjob/index.tmpl index dcededd33..304639fd2 100755 --- a/templates/repo/debugjob/index.tmpl +++ b/templates/repo/debugjob/index.tmpl @@ -217,6 +217,7 @@
@@ -430,12 +431,12 @@
- +
- +
@@ -488,6 +489,7 @@ diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl new file mode 100644 index 000000000..504a85abf --- /dev/null +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -0,0 +1,477 @@ +{{template "base/head" .}} + +
+
+
+
+
+
+
+
+
+
+ {{template "repo/header" .}} +
+ {{template "base/alert" .}} +

+ {{.i18n.Tr "repo.modelarts.train_job.new_infer"}} +

+
+ +
+ {{.CsrfTokenHtml}} + + + + {{if $.model_version}} + + {{else}} + + {{end}} + {{if $.label_names}} + + {{else}} + + {{end}} +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+
+ + + 请输入字母、数字、_和-,最长64个字符,且不能以中划线(-)结尾。 +
+ +
+    + +
+
+ + +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

+
+
+    + +
+
+ +
+
+ + +
+ + + +
+ +
+
+       + +
+
+ +
+
+ +
+   + +
+ +
+      + + + + +
+ +
+   + {{if .bootFile}} + + {{else}} + + {{end}} + + + + 查看样例 +
+ + +
+    + {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} + +
+ {{if ne 0 (len .params)}} + {{range $k ,$v := .params}} +
+
+ +
+
+ +
+ + + + +
+ {{end}} + {{end}} +
+
+ + + +
+         + +
+ +
+ + +
+ +
+ 推理输出路径存储在环境变量result_url中。 +
+ +
+ + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+ +
+
+
+
+{{template "base/footer" .}} + + \ No newline at end of file diff --git a/templates/repo/modelarts/inferencejob/show.tmpl b/templates/repo/modelarts/inferencejob/show.tmpl new file mode 100644 index 000000000..691087f66 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/show.tmpl @@ -0,0 +1,653 @@ +{{template "base/head" .}} + +
+{{template "repo/header" .}} +
+

+ +

+ {{with .task}} +
+ + +
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ {{$.i18n.Tr "repo.cloudbrain_task"}} + +
+ {{.JobName}} +
+
+ {{$.i18n.Tr "repo.modelarts.status"}} + +
+ {{.Status}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.start_time"}} + +
+ {{TimeSinceUnix1 .CreatedUnix}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}} + +
+ {{.TrainJobDuration}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.AI_driver"}} + +
+ {{.EngineName}} +
+
+ {{$.i18n.Tr "repo.model.manage.description"}} + +
+ {{if .Description}} + {{.Description}} + {{else}} + -- + {{end}} +
+
+ 创建人 + +
+ {{$.userName}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.compute_node"}} + +
+ {{.WorkServerNumber}} +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ {{$.i18n.Tr "repo.modelarts.infer_job_model"}} + +
+ {{.ModelName}}   + {{$.i18n.Tr "repo.modelarts.version"}}:{{.ModelVersion}}   + +
+
+ {{$.i18n.Tr "repo.modelarts.infer_job_model_file"}} + +
+ {{.CkptName}} +
+
+ {{$.i18n.Tr "repo.modelarts.model_label"}} + +
+ + {{if .LabelName}} + {{range $.labelName}} + {{.}} + {{end}} + {{else}} + -- + {{end}} +
+
+ {{$.i18n.Tr "repo.modelarts.code_version"}} + +
+ {{.BranchName}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.start_file"}} + +
+ {{.BootFile}} +
+
+ {{$.i18n.Tr "repo.modelarts.infer_dataset"}} + +
+ {{.DatasetName}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}} + +
+ {{if .Parameters}} + {{.Parameters}} + {{else}} + -- + {{end}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} + +
+ {{.FlavorName}} +
+
+
+
+
+ +
+
+ +
+
+ +
+ + +

+                        
+ +
+ +
+
+ + + +
+ +
+
+ +
+ {{end}} + + + + + +
+ +
+ +
+
+{{template "base/footer" .}} + \ No newline at end of file diff --git a/templates/repo/modelarts/notebook/new.tmpl b/templates/repo/modelarts/notebook/new.tmpl index 64851f7db..4e32b5ef3 100755 --- a/templates/repo/modelarts/notebook/new.tmpl +++ b/templates/repo/modelarts/notebook/new.tmpl @@ -48,7 +48,7 @@
- +
@@ -64,11 +64,11 @@
- +
- +
@@ -81,11 +81,11 @@
- +
- +
diff --git a/templates/repo/modelarts/trainjob/edit_para.tmpl b/templates/repo/modelarts/trainjob/edit_para.tmpl index eb408378b..c05c25361 100755 --- a/templates/repo/modelarts/trainjob/edit_para.tmpl +++ b/templates/repo/modelarts/trainjob/edit_para.tmpl @@ -18,11 +18,11 @@

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}

- +
- +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}

@@ -52,7 +52,7 @@
- + @@ -128,7 +128,7 @@
- +
@@ -141,11 +142,11 @@
{{$.CsrfTokenHtml}} {{if .CanDel}} - + {{$.i18n.Tr "repo.stop"}} {{else}} - + {{$.i18n.Tr "repo.stop"}} {{end}} @@ -262,7 +263,8 @@ const repoPath = job.dataset.repopath const versionname = job.dataset.version const status_text = $(`#${jobID}-text`).text() - if(['IMAGE_FAILED','SUBMIT_FAILED','DELETE_FAILED','KILLED','COMPLETED','FAILED','CANCELED','LOST','START_FAILED'].includes(status_text)){ + const finalState = ['IMAGE_FAILED','SUBMIT_FAILED','DELETE_FAILED','KILLED','COMPLETED','FAILED','CANCELED','LOST','START_FAILED','SUBMIT_MODEL_FAILED','DEPLOY_SERVICE_FAILED','CHECK_FAILED'] + if(finalState.includes(status_text)){ return } $.get(`/api/v1/repos/${repoPath}/modelarts/train-job/${jobID}?version_name=${versionname}`, (data) => { @@ -273,6 +275,7 @@ if (status != job.textContent.trim()) { $('#' + jobID+'-icon').removeClass().addClass(status) $('#' + jobID+ '-text').text(status) + finalState.includes(status) && $('#' + jobID + '-stop').removeClass('blue').addClass('disabled') } diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index 790b6d1eb..02fe8f4fc 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -80,12 +80,13 @@

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

- + + 请输入字母、数字、_和-,最长64个字符,且不能以中划线(-)结尾。
- +
@@ -110,7 +111,6 @@ {{end}} {{end}} {{end}} -
@@ -140,9 +140,9 @@
{{if .bootFile}} - + {{else}} - + {{end}} @@ -226,7 +226,7 @@
- +
@@ -312,7 +312,6 @@ }) }); - console.log(parameters) $('.ui.parameter.modal') .modal('hide'); for(var i = 2; i < parameters.length; i++){ @@ -379,65 +378,16 @@ $('select.dropdown') .dropdown(); - $('.ui.form') - .form({ - on: 'blur', - inline:true, - fields: { - boot_file: { - identifier : 'boot_file', - rules: [ - { - type: 'regExp[/.+\.py$/g]', - prompt : '启动文件必须为.py结尾' - } - ] - }, - job_name:{ - identifier : 'job_name', - rules: [ - { - type: 'regExp[/^[a-zA-Z0-9-_]{1,36}$/]', - prompt : '只包含大小写字母、数字、_和-,最长36个字符。' - } - ] - }, - attachment:{ - identifier : 'attachment', - rules: [ - { - type: 'empty', - prompt : '选择一个数据集' - } - ] - - }, - work_server_number: { - identifier : 'work_server_number', - rules: [ - { - type : 'integer[1..25]', - prompt : '计算节点需要在1-25之间,请您键入正确的值' - } - ] - } - }, - }) - - - function validate(){ $('.ui.form') .form({ on: 'blur', - inline:true, fields: { boot_file: { identifier : 'boot_file', rules: [ { type: 'regExp[/.+\.py$/g]', - prompt : '启动文件必须为.py结尾' } ] }, @@ -445,8 +395,7 @@ identifier : 'job_name', rules: [ { - type: 'regExp[/^[a-zA-Z0-9-_]{1,36}$/]', - prompt : '只包含大小写字母、数字、_和-,最长36个字符。' + type: 'regExp[/^[a-zA-Z0-9-_]{1,64}[^-]$/]', } ] }, @@ -455,7 +404,6 @@ rules: [ { type: 'empty', - prompt : '选择一个数据集' } ] @@ -465,7 +413,6 @@ rules: [ { type : 'integer[1..25]', - prompt : '计算节点需要在1-25之间,请您键入正确的值' } ] } diff --git a/templates/repo/modelarts/trainjob/show.tmpl b/templates/repo/modelarts/trainjob/show.tmpl index 1b8c13ef8..81d36c1e9 100755 --- a/templates/repo/modelarts/trainjob/show.tmpl +++ b/templates/repo/modelarts/trainjob/show.tmpl @@ -175,7 +175,7 @@ td, th {