diff --git a/models/action.go b/models/action.go index 869acb762..6d93fbcd3 100755 --- a/models/action.go +++ b/models/action.go @@ -67,6 +67,7 @@ const ( ActionChangeUserAvatar //38 ActionCreateGrampusNPUDebugTask //39 ActionCreateGrampusGPUDebugTask //40 + ActionCreateGrampusGCUDebugTask //41 ) // Action represents user operation type and other information to @@ -380,7 +381,8 @@ func (a *Action) IsCloudbrainAction() bool { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, - ActionCreateGrampusGPUTrainTask: + ActionCreateGrampusGPUTrainTask, + ActionCreateGrampusGCUDebugTask: return true } return false diff --git a/models/ai_model_manage.go b/models/ai_model_manage.go index d55370ea1..24ee0c78f 100644 --- a/models/ai_model_manage.go +++ b/models/ai_model_manage.go @@ -237,13 +237,10 @@ func QueryModelConvertByName(name string, repoId int64) ([]*AiModelConvert, erro func QueryModelConvertById(id string) (*AiModelConvert, error) { sess := x.NewSession() defer sess.Close() - sess.Select("*").Table(new(AiModelConvert)).Where("id='" + id + "'") - aiModelManageConvertList := make([]*AiModelConvert, 0) - err := sess.Find(&aiModelManageConvertList) - if err == nil { - if len(aiModelManageConvertList) == 1 { - return aiModelManageConvertList[0], nil - } + re := new(AiModelConvert) + isExist, err := sess.Table(new(AiModelConvert)).ID(id).Get(re) + if err == nil && isExist { + return re, nil } return nil, err } @@ -251,16 +248,10 @@ func QueryModelConvertById(id string) (*AiModelConvert, error) { func QueryModelById(id string) (*AiModelManage, error) { sess := x.NewSession() defer sess.Close() - sess.Select("*").Table("ai_model_manage"). - Where("id='" + id + "'") - aiModelManageList := make([]*AiModelManage, 0) - err := sess.Find(&aiModelManageList) - if err == nil { - if len(aiModelManageList) == 1 { - return aiModelManageList[0], nil - } - } else { - log.Info("error=" + err.Error()) + re := new(AiModelManage) + isExist, err := sess.Table(new(AiModelManage)).ID(id).Get(re) + if err == nil && isExist { + return re, nil } return nil, err } diff --git a/models/cloudbrain.go b/models/cloudbrain.go index a0007c47e..51e122bb6 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -36,6 +36,7 @@ const ( const ( NPUResource = "NPU" GPUResource = "CPU/GPU" + GCUResource = "GCU" AllResource = "all" //notebook storage category @@ -136,6 +137,11 @@ const ( //ComputeResource GPU = "GPU" NPU = "NPU" + GCU = "GCU" +) + +const ( + AIModelPath = "aimodels/" ) type Cloudbrain struct { @@ -304,6 +310,9 @@ func (task *Cloudbrain) IsUserHasRight(user *User) bool { func (task *Cloudbrain) IsGPUTask() bool { return task.ComputeResource == GPUResource } +func (task *Cloudbrain) IsGCUTask() bool { + return task.ComputeResource == GCUResource +} func (task *Cloudbrain) IsNPUTask() bool { return task.ComputeResource == NPUResource } @@ -1461,7 +1470,7 @@ type GetNotebookListResult struct { NotebookList []NotebookList `json:"data"` } -//Grampus +// Grampus type GrampusResult struct { ErrorCode int `json:"errorCode"` ErrorMsg string `json:"errorMsg"` @@ -1850,7 +1859,7 @@ func QueryModelTrainJobList(repoId int64) ([]*Cloudbrain, int, error) { // ) cloudbrains := make([]*Cloudbrain, 0) - if err := sess.Select("job_id,display_job_name").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC"). + if err := sess.Select("*").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC"). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } @@ -2191,8 +2200,10 @@ func GetGPUStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) Find(&cloudbrains) } -/** - 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间 +/* +* + + 本方法考虑了再次调试的情况,多次调试取最后一次的任务的结束时间 */ func GetGPUStoppedDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) @@ -2304,11 +2315,10 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy } return sess.Count(new(Cloudbrain)) } -func GetNotFinalStatusTaskCount(userID int64, notFinalStatus []string, jobTypes []JobType, cloudbrainTypes []int, computeResource string) (int, error) { +func GetNotFinalStatusTaskCount(userID int64, notFinalStatus []string, jobTypes []JobType) (int, error) { count, err := x.In("status", notFinalStatus). In("job_type", jobTypes). - In("type", cloudbrainTypes). - And("user_id = ? and compute_resource = ?", userID, computeResource).Count(new(Cloudbrain)) + And("user_id = ? ", userID).Count(new(Cloudbrain)) return int(count), err } @@ -2654,7 +2664,8 @@ type DatasetInfo struct { DataLocalPath string Name string FullName string - Size int + Type int + Size int64 } func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { @@ -2694,8 +2705,14 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn if len(grampusType) > 0 { if grampusType[0] == GPU { dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID - } else { + } else if grampusType[0] == NPU { dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } else if grampusType[0] == GCU { + if attach.Type == TypeCloudBrainOne { + dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } else { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } } } else { @@ -2710,7 +2727,8 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn DataLocalPath: dataLocalPath, Name: fileName, FullName: attach.Name, - Size: int(attach.Size), + Type: attach.Type, + Size: attach.Size, } if i == 0 { datasetNames = attach.Name diff --git a/models/task_config.go b/models/task_config.go index f86032fc9..14ca6b223 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -39,6 +39,7 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateGrampusGPUDebugTask, ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, + ActionCreateGrampusGCUDebugTask, ActionCreateGrampusGPUTrainTask: return TaskCreateCloudbrainTask case ActionCreateRepo: diff --git a/models/user_business_analysis.go b/models/user_business_analysis.go index ee1b17544..b01080cc6 100644 --- a/models/user_business_analysis.go +++ b/models/user_business_analysis.go @@ -330,7 +330,7 @@ func QueryUserStaticDataForUserDefine(opts *UserBusinessAnalysisQueryOptions, wi DataDate := currentTimeNow.Format("2006-01-02 15:04") CodeMergeCountMap := queryPullRequest(start_unix, end_unix) - CommitCountMap, _ := queryCommitAction(start_unix, end_unix, 5) + CommitCountMap := queryCommitAction(start_unix, end_unix, 5) IssueCountMap := queryCreateIssue(start_unix, end_unix) CommentCountMap := queryComment(start_unix, end_unix) @@ -586,7 +586,7 @@ func refreshUserStaticTable(wikiCountMap map[string]int, tableName string, pageS startTime := currentTimeNow.AddDate(0, 0, -1) CodeMergeCountMap := queryPullRequest(start_unix, end_unix) - CommitCountMap, _ := queryCommitAction(start_unix, end_unix, 5) + CommitCountMap := queryCommitAction(start_unix, end_unix, 5) IssueCountMap := queryCreateIssue(start_unix, end_unix) CommentCountMap := queryComment(start_unix, end_unix) @@ -762,7 +762,8 @@ func RefreshUserYearTable(pageStartTime time.Time, pageEndTime time.Time) { end_unix := pageEndTime.Unix() CodeMergeCountMap := queryPullRequest(start_unix, end_unix) - CommitCountMap, mostActiveMap := queryCommitAction(start_unix, end_unix, 5) + CommitCountMap := queryCommitAction(start_unix, end_unix, 5) + mostActiveMap := queryMostActiveCommitAction(start_unix, end_unix) IssueCountMap := queryCreateIssue(start_unix, end_unix) CommentCountMap := queryComment(start_unix, end_unix) @@ -841,17 +842,21 @@ func RefreshUserYearTable(pageStartTime time.Time, pageEndTime time.Time) { repoInfo := getRepoDetailInfo(DetailInfoMap, dateRecordAll.ID, MostDownloadMap) dataSetInfo, datasetscore := getDataSetInfo(dateRecordAll.ID, CreatedDataset, dataSetDownloadMap, CommitDatasetNumMap, CollectedDataset) scoreMap["datasetscore"] = datasetscore - codeInfo, codescore := getCodeInfo(dateRecordAll) + codeInfo, codescore := getCodeInfo(&dateRecordAll) scoreMap["codescore"] = codescore - cloudBrainInfo := getCloudBrainInfo(dateRecordAll, CloudBrainTaskItemMap, scoreMap) + cloudBrainInfo := getCloudBrainInfo(&dateRecordAll, CloudBrainTaskItemMap, scoreMap) playARoll := getPlayARoll(bonusMap, dateRecordAll.Name, scoreMap) + exteral := 0 + if int(subTime.Hours())%24 > 0 { + exteral = 1 + } re := &UserSummaryCurrentYear{ ID: dateRecordAll.ID, Name: dateRecordAll.Name, Email: dateRecordAll.Email, Phone: dateRecordAll.Phone, RegistDate: dateRecordAll.RegistDate, - DateCount: int(subTime.Hours()) / 24, + DateCount: int(subTime.Hours())/24 + exteral, MostActiveDay: mostActiveDay, RepoInfo: repoInfo, DataSetInfo: dataSetInfo, @@ -869,17 +874,6 @@ func RefreshUserYearTable(pageStartTime time.Time, pageEndTime time.Time) { log.Info("update user year data finished. ") } -func isUserYearData(tableName string) bool { - if tableName == "user_business_analysis_current_year" { - currentTimeNow := time.Now() - if currentTimeNow.Year() >= 2023 { - return false - } - return true - } - return false -} - func getBonusWeekDataMap() map[int64][]int { bonusMap := make(map[int64][]int) url := setting.RecommentRepoAddr + "bonus/weekdata/record.txt" @@ -1027,7 +1021,7 @@ func getPlayARoll(bonusMap map[string]map[string]int, userName string, scoreMap } } -func getCloudBrainInfo(dateRecordAll UserBusinessAnalysisAll, CloudBrainTaskItemMap map[string]int, scoreMap map[string]float64) string { +func getCloudBrainInfo(dateRecordAll *UserBusinessAnalysisAll, CloudBrainTaskItemMap map[string]int, scoreMap map[string]float64) string { trainscore := 0.0 debugscore := 0.0 runtime := 0.0 @@ -1043,6 +1037,7 @@ func getCloudBrainInfo(dateRecordAll UserBusinessAnalysisAll, CloudBrainTaskItem trainscore = float64(dateRecordAll.GpuTrainJob+dateRecordAll.NpuTrainJob) / float64(50) } cloudBrainInfo["inference_task_num"] = fmt.Sprint(dateRecordAll.NpuInferenceJob + CloudBrainTaskItemMap[fmt.Sprint(dateRecordAll.ID)+"_GpuInferenceJob"]) + cloudBrainInfo["benchmark_task_num"] = fmt.Sprint(dateRecordAll.GpuBenchMarkJob) cloudBrainInfo["card_runtime"] = fmt.Sprint(dateRecordAll.CloudBrainRunTime) cloudBrainInfo["card_runtime_money"] = fmt.Sprint(dateRecordAll.CloudBrainRunTime * 5) cloudBrainInfo["CloudBrainOne"] = fmt.Sprint(CloudBrainTaskItemMap[fmt.Sprint(dateRecordAll.ID)+"_CloudBrainOne"]) @@ -1061,7 +1056,7 @@ func getCloudBrainInfo(dateRecordAll UserBusinessAnalysisAll, CloudBrainTaskItem } } -func getCodeInfo(dateRecordAll UserBusinessAnalysisAll) (string, float64) { +func getCodeInfo(dateRecordAll *UserBusinessAnalysisAll) (string, float64) { if dateRecordAll.CommitCount > 0 { codeInfo := make(map[string]string) codeInfo["commit_count"] = fmt.Sprint(dateRecordAll.CommitCount) @@ -1245,7 +1240,7 @@ func CounDataByDateAndReCount(wikiCountMap map[string]int, startTime time.Time, DataDate := CountDate.Format("2006-01-02") CodeMergeCountMap := queryPullRequest(start_unix, end_unix) - CommitCountMap, _ := queryCommitAction(start_unix, end_unix, 5) + CommitCountMap := queryCommitAction(start_unix, end_unix, 5) IssueCountMap := queryCreateIssue(start_unix, end_unix) CommentCountMap := queryComment(start_unix, end_unix) @@ -1391,7 +1386,7 @@ func CounDataByDateAndReCount(wikiCountMap map[string]int, startTime time.Time, useMetrics.TotalActivateRegistUser = getMapKeyStringValue("TotalActivateRegistUser", userMetrics) useMetrics.TotalHasActivityUser = getMapKeyStringValue("TotalHasActivityUser", userMetrics) useMetrics.CurrentDayRegistUser = getMapKeyStringValue("CurrentDayRegistUser", userMetrics) - count, err = sess.Where("type=0").Count(new(User)) + count, err = sess.Where("type=0 and created_unix<=" + fmt.Sprint(end_unix)).Count(new(User)) if err != nil { log.Info("query user error. return.") } @@ -1739,52 +1734,48 @@ func queryPullRequest(start_unix int64, end_unix int64) map[int64]int { return resultMap } -func queryCommitAction(start_unix int64, end_unix int64, actionType int64) (map[int64]int, map[int64]map[string]int) { +func queryMostActiveCommitAction(start_unix int64, end_unix int64) map[int64]map[string]int { sess := x.NewSession() defer sess.Close() - resultMap := make(map[int64]int) + mostActiveMap := make(map[int64]map[string]int) cond := "user_id=act_user_id and created_unix>=" + fmt.Sprint(start_unix) + " and created_unix<=" + fmt.Sprint(end_unix) count, err := sess.Where(cond).Count(new(Action)) if err != nil { log.Info("query action error. return.") - return resultMap, mostActiveMap + return mostActiveMap } var indexTotal int64 indexTotal = 0 for { - sess.Select("id,user_id,op_type,act_user_id,created_unix").Table("action").Where(cond).OrderBy("id asc").Limit(PAGE_SIZE, int(indexTotal)) - actionList := make([]*Action, 0) - sess.Find(&actionList) - - log.Info("query action size=" + fmt.Sprint(len(actionList))) + actionList, err := sess.QueryInterface("select id,user_id,op_type,act_user_id,created_unix from public.action where " + cond + " order by id asc limit " + fmt.Sprint(PAGE_SIZE) + " offset " + fmt.Sprint(indexTotal)) + if err != nil { + log.Info("error:" + err.Error()) + continue + } + log.Info("query mostactive action size=" + fmt.Sprint(len(actionList))) for _, actionRecord := range actionList { - if int64(actionRecord.OpType) == actionType { - if _, ok := resultMap[actionRecord.UserID]; !ok { - resultMap[actionRecord.UserID] = 1 - } else { - resultMap[actionRecord.UserID] += 1 - } - } - key := getDate(actionRecord.CreatedUnix) - if _, ok := mostActiveMap[actionRecord.UserID]; !ok { + userId := convertInterfaceToInt64(actionRecord["user_id"]) + created_unix := timeutil.TimeStamp(convertInterfaceToInt64(actionRecord["created_unix"])) + key := getDate(created_unix) + if _, ok := mostActiveMap[userId]; !ok { tmpMap := make(map[string]int) tmpMap[key] = 1 - mostActiveMap[actionRecord.UserID] = tmpMap + mostActiveMap[userId] = tmpMap } else { - mostActiveMap[actionRecord.UserID][key] = getMapKeyStringValue(key, mostActiveMap[actionRecord.UserID]) + 1 + mostActiveMap[userId][key] = getMapKeyStringValue(key, mostActiveMap[userId]) + 1 } - utcTime := actionRecord.CreatedUnix.AsTime() + utcTime := created_unix.AsTime() hour := utcTime.Hour() if hour >= 0 && hour <= 5 { key = "hour_hour" - if getMapKeyStringValue(key, mostActiveMap[actionRecord.UserID]) < hour { - mostActiveMap[actionRecord.UserID][key] = hour - mostActiveMap[actionRecord.UserID]["hour_day"] = utcTime.Day() - mostActiveMap[actionRecord.UserID]["hour_month"] = int(utcTime.Month()) - mostActiveMap[actionRecord.UserID]["hour_year"] = utcTime.Year() + if getMapKeyStringValue(key, mostActiveMap[userId]) < hour { + mostActiveMap[userId][key] = hour + mostActiveMap[userId]["hour_day"] = utcTime.Day() + mostActiveMap[userId]["hour_month"] = int(utcTime.Month()) + mostActiveMap[userId]["hour_year"] = utcTime.Year() } } } @@ -1793,9 +1784,60 @@ func queryCommitAction(start_unix int64, end_unix int64, actionType int64) (map[ break } } + return mostActiveMap +} - return resultMap, mostActiveMap +func queryCommitAction(start_unix int64, end_unix int64, actionType int64) map[int64]int { + sess := x.NewSession() + defer sess.Close() + resultMap := make(map[int64]int) + cond := "op_type=" + fmt.Sprint(actionType) + " and user_id=act_user_id and created_unix>=" + fmt.Sprint(start_unix) + " and created_unix<=" + fmt.Sprint(end_unix) + count, err := sess.Where(cond).Count(new(Action)) + if err != nil { + log.Info("query action error. return.") + return resultMap + } + var indexTotal int64 + indexTotal = 0 + for { + actionList, err := sess.QueryInterface("select id,user_id,op_type,act_user_id,created_unix from public.action where " + cond + " order by id asc limit " + fmt.Sprint(PAGE_SIZE) + " offset " + fmt.Sprint(indexTotal)) + if err != nil { + log.Info("error:" + err.Error()) + continue + } + log.Info("query action size=" + fmt.Sprint(len(actionList))) + for _, actionRecord := range actionList { + userId := convertInterfaceToInt64(actionRecord["user_id"]) + + if _, ok := resultMap[userId]; !ok { + resultMap[userId] = 1 + } else { + resultMap[userId] += 1 + } + + } + indexTotal += PAGE_SIZE + if indexTotal >= count { + break + } + } + return resultMap } + +func convertInterfaceToInt64(obj interface{}) int64 { + switch obj.(type) { + case int8: + return int64(obj.(int8)) + case int16: + return int64(obj.(int16)) + case int32: + return int64(obj.(int32)) + case int64: + return obj.(int64) + } + return 0 +} + func getDate(createTime timeutil.TimeStamp) string { return createTime.Format("2006-01-02") } @@ -2258,7 +2300,7 @@ func queryUserCreateRepo(start_unix int64, end_unix int64) (map[int64]int, map[s var indexTotal int64 indexTotal = 0 for { - sess.Select("id,owner_id,name,is_private,clone_cnt").Table("repository").Where(cond).OrderBy("id asc").Limit(PAGE_SIZE, int(indexTotal)) + sess.Select("id,owner_id,name,is_private,clone_cnt,alias").Table("repository").Where(cond).OrderBy("id asc").Limit(PAGE_SIZE, int(indexTotal)) repoList := make([]*Repository, 0) sess.Find(&repoList) log.Info("query Repository size=" + fmt.Sprint(len(repoList))) @@ -2481,6 +2523,13 @@ func queryUserModelConvert(start_unix int64, end_unix int64) map[int64]int { return resultMap } +func isBenchMark(JobType string) bool { + if JobType == "BENCHMARK" || JobType == "MODELSAFETY" || JobType == "SNN4IMAGENET" || JobType == "BRAINSCORE" || JobType == "SNN4ECOSET" { + return true + } + return false +} + func queryCloudBrainTask(start_unix int64, end_unix int64) (map[int64]int, map[string]int) { sess := x.NewSession() defer sess.Close() @@ -2488,11 +2537,12 @@ func queryCloudBrainTask(start_unix int64, end_unix int64) (map[int64]int, map[s resultItemMap := make(map[string]int) cond := " created_unix>=" + fmt.Sprint(start_unix) + " and created_unix<=" + fmt.Sprint(end_unix) - count, err := sess.Where(cond).Count(new(Cloudbrain)) + count, err := sess.Where(cond).Unscoped().Count(new(Cloudbrain)) if err != nil { log.Info("query cloudbrain error. return.") return resultMap, resultItemMap } + log.Info("cloudbrain count=" + fmt.Sprint(count)) var indexTotal int64 indexTotal = 0 for { @@ -2515,6 +2565,8 @@ func queryCloudBrainTask(start_unix int64, end_unix int64) (map[int64]int, map[s setMapKey("NpuTrainJob", cloudTaskRecord.UserID, 1, resultItemMap) } else if cloudTaskRecord.JobType == "INFERENCE" { setMapKey("NpuInferenceJob", cloudTaskRecord.UserID, 1, resultItemMap) + } else if isBenchMark(cloudTaskRecord.JobType) { + setMapKey("GpuBenchMarkJob", cloudTaskRecord.UserID, 1, resultItemMap) } else { setMapKey("NpuDebugJob", cloudTaskRecord.UserID, 1, resultItemMap) } @@ -2524,7 +2576,7 @@ func queryCloudBrainTask(start_unix int64, end_unix int64) (map[int64]int, map[s setMapKey("GpuTrainJob", cloudTaskRecord.UserID, 1, resultItemMap) } else if cloudTaskRecord.JobType == "INFERENCE" { setMapKey("GpuInferenceJob", cloudTaskRecord.UserID, 1, resultItemMap) - } else if cloudTaskRecord.JobType == "BENCHMARK" { + } else if isBenchMark(cloudTaskRecord.JobType) { setMapKey("GpuBenchMarkJob", cloudTaskRecord.UserID, 1, resultItemMap) } else { setMapKey("GpuDebugJob", cloudTaskRecord.UserID, 1, resultItemMap) @@ -2551,7 +2603,6 @@ func queryCloudBrainTask(start_unix int64, end_unix int64) (map[int64]int, map[s break } } - return resultMap, resultItemMap } diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 37e6fc1bf..b65b42bdd 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -19,6 +19,7 @@ const ( ProcessorTypeNPU = "npu.huawei.com/NPU" ProcessorTypeGPU = "nvidia.com/gpu" + ProcessorTypeGCU = "enflame-tech.com/gcu" GpuWorkDir = "/tmp/" NpuWorkDir = "/cache/" @@ -108,6 +109,7 @@ type GenerateNotebookJobReq struct { Spec *models.Specification CodeName string ModelPath string //参考启智GPU调试, 挂载/model目录用户的模型可以输出到这个目录 + ModelStorageType int } func getEndPoint() string { @@ -148,6 +150,36 @@ func getDatasetGPUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models. } return datasetGrampus, command } +func getDatasetGCUGrampus(datasetInfos map[string]models.DatasetInfo) ([]models.GrampusDataset, string) { + var datasetGrampus []models.GrampusDataset + var command = "" + obsEndPoint := getEndPoint() + for uuid, datasetInfo := range datasetInfos { + if datasetInfo.Type == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: datasetInfo.DataLocalPath, + ReadOnly: true, + ContainerPath: "/dataset1/" + datasetInfo.Name, + }) + + command += "cp /dataset1/'" + datasetInfo.Name + "'/" + uuid + " /dataset/'" + datasetInfo.FullName + "';" + + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: datasetInfo.FullName, + Bucket: setting.Bucket, + EndPoint: obsEndPoint, + ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, + ContainerPath: "/dataset/" + datasetInfo.Name, + }) + } + + } + return datasetGrampus, command +} func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() @@ -178,25 +210,45 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job imageUrl = "" req.Command = "" } else { - datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + if ProcessorTypeGCU == req.ProcessType { + datasetGrampus, cpCommand = getDatasetGCUGrampus(req.DatasetInfos) + } else { + datasetGrampus, cpCommand = getDatasetGPUGrampus(req.DatasetInfos) + } if len(req.ModelName) != 0 { - datasetGrampus = append(datasetGrampus, models.GrampusDataset{ - Name: req.ModelName, - Bucket: setting.Attachment.Minio.Bucket, - EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.PreTrainModelPath, - ReadOnly: true, - ContainerPath: cloudbrain.PretrainModelMountPath, - }) + if req.ModelStorageType == models.TypeCloudBrainOne { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Attachment.Minio.Bucket, + EndPoint: setting.Attachment.Minio.Endpoint, + ObjectKey: req.PreTrainModelPath, + ReadOnly: true, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } else { + datasetGrampus = append(datasetGrampus, models.GrampusDataset{ + Name: req.ModelName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ReadOnly: true, + ObjectKey: req.PreTrainModelPath, + ContainerPath: cloudbrain.PretrainModelMountPath, + }) + } + } + codeArchiveName := cloudbrain.DefaultBranchName + ".zip" codeGrampus = models.GrampusDataset{ Name: req.CodeName, Bucket: setting.Attachment.Minio.Bucket, EndPoint: setting.Attachment.Minio.Endpoint, - ObjectKey: req.CodeStoragePath + cloudbrain.DefaultBranchName + ".zip", + ObjectKey: req.CodeStoragePath + codeArchiveName, ReadOnly: false, ContainerPath: cloudbrain.CodeMountPath, } + if ProcessorTypeGCU == req.ProcessType { + imageUrl = "" + } req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) @@ -215,6 +267,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, Command: req.Command, + CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID), }, }, }) @@ -263,6 +316,8 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job actionType = models.ActionCreateGrampusNPUDebugTask } else if req.ComputeResource == models.GPUResource { actionType = models.ActionCreateGrampusGPUDebugTask + } else if req.ComputeResource == models.GCUResource { + actionType = models.ActionCreateGrampusGCUDebugTask } task, err := models.GetCloudbrainByJobID(jobID) if err != nil { diff --git a/modules/redis/redis_key/cloudbrain_redis_key.go b/modules/redis/redis_key/cloudbrain_redis_key.go index 4c5d05dfc..a442d89f4 100644 --- a/modules/redis/redis_key/cloudbrain_redis_key.go +++ b/modules/redis/redis_key/cloudbrain_redis_key.go @@ -1,7 +1,13 @@ package redis_key +import "fmt" + const CLOUDBRAIN_PREFIX = "cloudbrain" func CloudbrainBindingJobNameKey(repoId string, jobType string, jobName string) string { return KeyJoin(CLOUDBRAIN_PREFIX, repoId, jobType, jobName, "redis_key") } + +func CloudbrainUniquenessKey(userId int64, jobType string) string { + return KeyJoin(CLOUDBRAIN_PREFIX, fmt.Sprint(userId), jobType, "uniqueness") +} diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 09e7259f2..44dff2779 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -690,6 +690,8 @@ var ( IncubationSourceOrgName string PaperRepoTopicName string + CloudbrainUniquenessLockTime time.Duration + //nginx proxy PROXYURL string RadarMap = struct { @@ -1506,6 +1508,8 @@ func NewContext() { CullInterval = sec.Key("CULL_INTERVAL").MustString("60") DebugAttachSize = sec.Key("DEBUG_ATTACH_SIZE").MustInt(20) + CloudbrainUniquenessLockTime = sec.Key("UNIQUENESS_LOCK_TIME").MustDuration(5 * time.Minute) + sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) BenchmarkOwner = sec.Key("OWNER").MustString("") diff --git a/modules/storage/minio_ext.go b/modules/storage/minio_ext.go index d4a8abba5..6aeb89aab 100755 --- a/modules/storage/minio_ext.go +++ b/modules/storage/minio_ext.go @@ -391,3 +391,19 @@ func GetPartInfos(objectName string, uploadID string) (string, error) { return chunks, nil } + +func IsObjectExist4Minio(bucket, objectName string) (bool, error) { + _, core, err := getClients() + if err != nil { + log.Error("getClients failed:", err.Error()) + return false, err + } + + _, err = core.StatObject(bucket, objectName, miniov6.StatObjectOptions{}) + if err != nil { + log.Error("GetObjectMetadata error.%v", err) + return false, err + } + + return true, nil +} diff --git a/modules/storage/obs.go b/modules/storage/obs.go index cc621cc3c..d00d000b5 100755 --- a/modules/storage/obs.go +++ b/modules/storage/obs.go @@ -645,3 +645,16 @@ func GetObsLogFileName(prefix string) ([]FileInfo, error) { } return fileInfos, nil } + +func IsObjectExist4Obs(bucket, key string) (bool, error) { + + _, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{ + Bucket: bucket, + Key: key, + }) + if err != nil { + log.Error("GetObjectMetadata error.%v", err) + return false, err + } + return true, nil +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index f04b67903..1044876d7 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1099,8 +1099,8 @@ image_delete_fail=Failed to delete image, please try again later. image_overwrite=You had submitted the same name image before, are you sure to overwrite the original image? download=Download score=Score -wait_count_start = Your current queue position is -wait_count_end = +wait_count_start = Your current queue position is +wait_count_end = file_limit_100 = Display up to 100 files or folders in a single directory images.name = Image Tag images.name_placerholder = Please enter the image name @@ -1174,7 +1174,7 @@ modelarts.train_job.new_train=New Train Task modelarts.train_job.new_infer=New Inference Task modelarts.train_job.config=Configuration information modelarts.train_job.new=New train Task -modelarts.train_job.new_place=The description should not exceed 256 characters +modelarts.train_job.new_place=The description should not exceed 255 characters modelarts.model_name=Model Name modelarts.model_size=Model Size modelarts.import_model=Import Model @@ -1360,6 +1360,7 @@ modelconvert.inputshapeerror=Format input error, please input such as: 1,1,32,32 modelconvert.manage.create_error1=A model transformation task with the same name already exists. modelconvert.manage.create_error2=Only one running model transformation task can be created. modelconvert.manage.model_not_exist=The model in the task does not exist or has been deleted. +modelconvert.manage.model_file_not_exist=The model file in the task does not exist or has been deleted. modelconvert.manage.no_operate_right=You have no right to do the operation. debug.manage.model_not_exist=The model in the task does not exist or has been deleted, please create a new debug job. @@ -3146,6 +3147,7 @@ task_gpudebugjob=`created CPU/GPU type debugging task %s` task_c2net_gpudebugjob=`created CPU/GPU type debugging task %s` task_c2net_npudebugjob=`created NPU type debugging task %s` +task_c2ent_gcudebugjob=`created GCU type debugging task %s` task_nputrainjob=`created NPU training task %s` task_inferencejob=`created reasoning task %s` task_benchmark=`created profiling task %s` @@ -3320,7 +3322,9 @@ Stopped_failed=Fail to stop the job, please try again later. Stopped_success_update_status_fail=Succeed in stopping th job, but failed to update the job status and duration time. load_code_failed=Fail to load code, please check if the right branch is selected. +error.debug_datasetsize = The size of dataset exceeds limitation (%dGB) error.dataset_select = dataset select error:the count exceed the limit or has same name +error.partial_datasets_not_available = There are non-existent or deleted files in the selected dataset file, please select again new_train_gpu_tooltips = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the run parameter %s, and please put your model into %s then you can download it online new_debug_gpu_tooltips = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the %s, and please put your model into %s then you can download it online new_debug_gpu_tooltips1 = The code is storaged in %s, the dataset is storaged in %s, the pre-trained model is storaged in the %s. diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index de7fea353..5cef94087 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -1085,7 +1085,7 @@ delete=删除 more=更多 gpu_type_all=全部 model_download=结果下载 -all_result_download=全部结果下载 +all_result_download=全部结果下载 submit_image=提交镜像 modify_image=修改镜像 image_exist=镜像Tag已被使用,请修改镜像Tag。 @@ -1374,6 +1374,7 @@ modelconvert.modelfileempty=请选择模型文件。 modelconvert.manage.create_error1=相同的名称模型转换任务已经存在。 modelconvert.manage.create_error2=只能创建一个正在运行的模型转换任务。 modelconvert.manage.model_not_exist=任务中选择的模型不存在或者已被删除。 +modelconvert.manage.model_file_not_exist=任务中选择的模型文件不存在或者已被删除。 modelconvert.manage.no_operate_right=您没有操作权限。 @@ -3165,6 +3166,7 @@ task_gpudebugjob=`创建了CPU/GPU类型调试任务 task_npudebugjob=`创建了NPU类型调试任务 %s` task_c2net_gpudebugjob=`创建了CPU/GPU类型调试任务 %s` task_c2net_npudebugjob=`创建了NPU类型调试任务 %s` +task_c2ent_gcudebugjob=`创建了GCU类型调试任务 %s` task_nputrainjob=`创建了NPU类型训练任务 %s` task_inferencejob=`创建了推理任务 %s` task_benchmark=`创建了评测任务 %s` @@ -3341,8 +3343,9 @@ Stopped_failed=任务停止失败,请稍后再试。 Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。 load_code_failed=代码加载失败,请确认选择了正确的分支。 -error.debug_datasetsize = 数据集大小超过限制('%d'GB) +error.debug_datasetsize = 数据集大小超过限制(%dGB) error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集 +error.partial_datasets_not_available = 选择的数据集文件中有不存在或已删除的文件,请重新选择 new_train_gpu_tooltips = 训练脚本存储在 %s 中,数据集存储在 %s 中,预训练模型存放在运行参数 %s 中,训练输出请存储在 %s 中以供后续下载。 new_debug_gpu_tooltips = 项目代码存储在 %s 中,数据集存储在 %s 中,选择的模型存储在 %s 中,调试输出请存储在 %s 中以供后续下载。 new_debug_gpu_tooltips1 = 项目代码存储在 %s 中,数据集存储在 %s 中,选择的模型存储在 %s 中。 diff --git a/public/home/home.js b/public/home/home.js index fe843161e..626a1302f 100755 --- a/public/home/home.js +++ b/public/home/home.js @@ -243,11 +243,12 @@ document.onreadystatechange = function () { html += recordPrefix + actionName; html += " " + getRepotext(record) + "" } - else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ + else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" + || record.OpType == "31" || record.OpType == "32" || record.OpType == "33"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } - else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40"){ + else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } @@ -294,7 +295,7 @@ function getTaskLink(record){ re = re + "/cloudbrain/train-job/" + record.Content; }else if(record.OpType == 32 || record.OpType == 33){ re = re + "/grampus/train-job/" + record.Content; - }else if(record.OpType == 39 || record.OpType == 40){ + }else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41){ re = re + "/grampus/notebook/" + record.Content; } @@ -453,9 +454,10 @@ var actionNameZH={ "33":"创建了CPU/GPU类型训练任务", "35":"创建的数据集 {dataset} 被设置为推荐数据集", "36":"提交了镜像 {image}", - "37": "提交的镜像 {image} 被设置为推荐镜像", + "37":"提交的镜像 {image} 被设置为推荐镜像", "39":"创建了CPU/GPU类型调试任务", "40":"创建了NPU类型调试任务", + "41":"创建了GCU类型调试任务", }; var actionNameEN={ @@ -486,9 +488,10 @@ var actionNameEN={ "33":" created CPU/GPU type training task", "35":" created dataset {dataset} was set as recommended dataset", "36":"committed image {image}", - "37": "committed image {image} was set as recommended image", + "37":"committed image {image} was set as recommended image", "39":" created CPU/GPU type debugging task ", "40":" created NPU type debugging task ", + "41":" created GCU type debugging task ", }; var repoAndOrgZH={ diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index ba7346481..3a5ee5f57 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -738,6 +738,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Combo("/repositories/:id", reqToken()).Get(repo.GetByID) m.Group("/datasets/:username/:reponame", func() { + m.Get("", repo.CurrentRepoDatasetInfoWithoutAttachment) m.Get("/current_repo", repo.CurrentRepoDatasetMultiple) m.Get("/my_datasets", repo.MyDatasetsMultiple) m.Get("/public_datasets", repo.PublicDatasetMultiple) diff --git a/routers/api/v1/repo/datasets.go b/routers/api/v1/repo/datasets.go index 0223cf740..2dbda4fb6 100644 --- a/routers/api/v1/repo/datasets.go +++ b/routers/api/v1/repo/datasets.go @@ -51,6 +51,28 @@ func CurrentRepoDatasetMultiple(ctx *context.APIContext) { } +func CurrentRepoDatasetInfoWithoutAttachment(ctx *context.APIContext) { + dataset, err := models.GetDatasetByRepo(ctx.Repo.Repository) + + if err != nil { + log.Warn("can not get dataset.", err) + ctx.JSON(200, map[string]interface{}{ + "code": 0, + "message": "", + "data": []*api.Dataset{}, + }) + return + } + + dataset.Repo = ctx.Repo.Repository + ctx.JSON(200, map[string]interface{}{ + "code": 0, + "message": "", + "data": []*api.Dataset{convert.ToDataset(dataset)}, + }) + +} + func MyDatasetsMultiple(ctx *context.APIContext) { opts := &models.SearchDatasetOptions{ diff --git a/routers/home.go b/routers/home.go index 40a41bd68..21b1db946 100755 --- a/routers/home.go +++ b/routers/home.go @@ -42,6 +42,7 @@ const ( tplExploreImages base.TplName = "explore/images" tplExploreExploreDataAnalysis base.TplName = "explore/data_analysis" tplHomeTerm base.TplName = "terms" + tplHomeAnnual base.TplName = "annual_privacy" tplHomePrivacy base.TplName = "privacy" tplResoruceDesc base.TplName = "resource_desc" tplRepoSquare base.TplName = "explore/repos/square" @@ -966,6 +967,9 @@ func RecommendHomeInfo(ctx *context.Context) { func HomeTerm(ctx *context.Context) { ctx.HTML(200, tplHomeTerm) } +func HomeAnnual(ctx *context.Context) { + ctx.HTML(200, tplHomeAnnual) +} func HomePrivacy(ctx *context.Context) { ctx.HTML(200, tplHomePrivacy) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index a074119fc..e67568394 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -76,7 +76,7 @@ func saveModelByParameters(jobId string, versionName string, name string, versio cloudType := aiTask.Type modelSelectedFile := ctx.Query("modelSelectedFile") //download model zip //train type - if aiTask.ComputeResource == models.NPUResource { + if aiTask.ComputeResource == models.NPUResource || aiTask.ComputeResource == models.GCUResource { cloudType = models.TypeCloudBrainTwo } else if aiTask.ComputeResource == models.GPUResource { cloudType = models.TypeCloudBrainOne diff --git a/routers/repo/aisafety.go b/routers/repo/aisafety.go index 55f25dba6..27e123fac 100644 --- a/routers/repo/aisafety.go +++ b/routers/repo/aisafety.go @@ -2,6 +2,7 @@ package repo import ( "bufio" + "code.gitea.io/gitea/services/lock" "encoding/json" "errors" "fmt" @@ -575,6 +576,20 @@ func AiSafetyCreateForPost(ctx *context.Context) { tpname = tplCloudBrainModelSafetyNewGpu } + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeInference)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr(errMsg), tpname, nil) + return + } + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeModelSafety), displayJobName) if err == nil { if len(tasks) != 0 { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index cf6df6312..f220802bf 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2,6 +2,7 @@ package repo import ( "bufio" + "code.gitea.io/gitea/services/lock" "encoding/json" "errors" "fmt" @@ -40,8 +41,6 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/modelarts" - "code.gitea.io/gitea/modules/redis/redis_key" - "code.gitea.io/gitea/modules/redis/redis_lock" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/util" @@ -228,16 +227,19 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { if jobType == string(models.JobTypeTrain) { tpl = tplCloudBrainTrainJobNew } + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: form.JobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tpl, &form) return } - defer lock.UnLock() tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { @@ -285,7 +287,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } var datasetInfos map[string]models.DatasetInfo var datasetNames string - var attachSize int + var attachSize int64 if uuids != "" { datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids) if err != nil { @@ -299,10 +301,10 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int(setting.DebugAttachSize*1000*1000*1000) { - log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) // GB + if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tpl, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form) return } } @@ -462,15 +464,19 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra repo := ctx.Repo.Repository tpl := tplCloudBrainInferenceJobNew - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: jobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + ctx.RenderWithErr(ctx.Tr(errMsg), tpl, &form) return } - defer lock.UnLock() ckptUrl := setting.Attachment.Minio.RealPath + form.TrainUrl + form.CkptName log.Info("ckpt url:" + ckptUrl) @@ -603,8 +609,10 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra } -/** - 检查用户传输的参数是否符合专属资源池 +/* +* + + 检查用户传输的参数是否符合专属资源池 */ func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string { if cloudbrain.SpecialPools != nil { @@ -645,7 +653,24 @@ func CloudBrainRestart(ctx *context.Context) { var errorMsg = "" var status = string(models.JobWaiting) task := ctx.Cloudbrain + + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainRestart(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{JobType: task.JobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + resultCode = "-1" + errorMsg = ctx.Tr(errMsg) + } for { + if errorMsg != "" { + break + } + if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) resultCode = "-1" @@ -746,7 +771,6 @@ func CloudBrainRestart(ctx *context.Context) { }) } - func hasDatasetDeleted(task *models.Cloudbrain) bool { if task.Uuid == "" { return false @@ -1321,8 +1345,8 @@ func DeleteJobsByRepoID(repoID int64) { DeleteJobs(cloudBrains) } -/** - +/* +* */ func StopJobs(cloudBrains []*models.Cloudbrain) { @@ -1352,14 +1376,11 @@ func StopJobs(cloudBrains []*models.Cloudbrain) { logErrorAndUpdateJobStatus(err, taskInfo) } } else if taskInfo.Type == models.TypeC2Net { - if taskInfo.JobType == string(models.JobTypeTrain) { - err := retry(3, time.Second*30, func() error { - _, err := grampus.StopJob(taskInfo.JobID) - return err - }) - logErrorAndUpdateJobStatus(err, taskInfo) - } - + err := retry(3, time.Second*30, func() error { + _, err := grampus.StopJob(taskInfo.JobID, taskInfo.JobType) + return err + }) + logErrorAndUpdateJobStatus(err, taskInfo) } } } @@ -2389,15 +2410,19 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo ctx.Data["benchmarkTypeID"] = benchmarkTypeID ctx.Data["benchmark_child_types_id_hidden"] = benchmarkChildTypeID - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), form.JobType, displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: form.JobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplCloudBrainBenchmarkNew, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tplCloudBrainBenchmarkNew, &form) return } - defer lock.UnLock() tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { @@ -2578,15 +2603,19 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) tpl := tplCloudBrainBenchmarkNew command := cloudbrain.GetCloudbrainDebugCommand() - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx, jobType) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: jobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + ctx.RenderWithErr(ctx.Tr(errMsg), tpl, &form) return } - defer lock.UnLock() tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index d33fce106..a4cc2dd14 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1,6 +1,7 @@ package repo import ( + "code.gitea.io/gitea/services/lock" "encoding/json" "errors" "fmt" @@ -27,8 +28,6 @@ import ( "code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/notification" - "code.gitea.io/gitea/modules/redis/redis_key" - "code.gitea.io/gitea/modules/redis/redis_lock" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" "github.com/unknwon/com" @@ -53,6 +52,8 @@ const ( //NPU tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new" tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" + //GCU + tplGrampusNotebookGCUNew base.TplName = "repo/grampus/notebook/gcu/new" ) func GrampusNotebookNew(ctx *context.Context) { @@ -61,6 +62,8 @@ func GrampusNotebookNew(ctx *context.Context) { processType := grampus.ProcessorTypeGPU if notebookType == 1 { processType = grampus.ProcessorTypeNPU + } else if notebookType == 2 { + processType = grampus.ProcessorTypeGCU } err := grampusNotebookNewDataPrepare(ctx, processType) if err != nil { @@ -69,8 +72,10 @@ func GrampusNotebookNew(ctx *context.Context) { } if processType == grampus.ProcessorTypeGPU { ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) - } else { + } else if processType == grampus.ProcessorTypeNPU { ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) + } else if processType == grampus.ProcessorTypeGCU { + ctx.HTML(http.StatusOK, tplGrampusNotebookGCUNew) } } @@ -117,15 +122,25 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook computeSource = models.NPUResource computeSourceSimple = models.NPU codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath - } + } else if form.Type == 2 { + tpl = tplGrampusNotebookGCUNew + processType = grampus.ProcessorTypeGCU + computeSource = models.GCUResource + computeSourceSimple = models.GCU + codeStoragePath = setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" + } + + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeDebug)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) - defer lock.UnLock() - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) grampusNotebookNewDataPrepare(ctx, processType) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tpl, &form) return } @@ -190,6 +205,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook var datasetInfos map[string]models.DatasetInfo var datasetNames string + var attachSize int64 //var if uuid != "" { datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, computeSourceSimple) @@ -199,6 +215,21 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } + uuidArray := strings.Split(uuid, ";") + if datasetInfos == nil || len(datasetInfos) < len(uuidArray) { + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.partial_datasets_not_available"), tpl, &form) + return + } + for _, infos := range datasetInfos { + attachSize += infos.Size + } + if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form) + return + } } //prepare code and out path @@ -215,7 +246,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook return } - if processType == grampus.ProcessorTypeGPU { + if processType == grampus.ProcessorTypeGPU || processType == grampus.ProcessorTypeGCU { if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusNotebookNewDataPrepare(ctx, processType) @@ -255,20 +286,26 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook if form.ModelName != "" { //使用预训练模型训练 - _, err := models.QueryModelByPath(form.PreTrainModelUrl) + m, err := models.QueryModelByPath(form.PreTrainModelUrl) if err != nil { log.Error("Can not find model", err) grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr(ctx.Tr("repo.modelconvert.manage.model_not_exist"), tpl, &form) return } + if !cloudbrainTask.IsModelFileExists(m, form.CkptName) { + log.Error("model file not exist.name = %s", form.CkptName) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("repo.modelconvert.manage.model_file_not_exist"), tpl, &form) + return + } req.ModelName = form.ModelName req.LabelName = form.LabelName req.CkptName = form.CkptName req.ModelVersion = form.ModelVersion req.PreTrainModelUrl = form.PreTrainModelUrl req.PreTrainModelPath = getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) - + req.ModelStorageType = m.Type } _, err = grampus.GenerateNotebookJob(ctx, req) @@ -287,7 +324,7 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["display_job_name"] = displayJobName //get valid images - if processType == grampus.ProcessorTypeNPU { + if processType == grampus.ProcessorTypeNPU || processType == grampus.ProcessorTypeGCU { images, err := grampus.GetImages(processType, string(models.JobTypeDebug)) if err != nil { log.Error("GetImages failed:", err.Error()) @@ -303,6 +340,10 @@ func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) err computeResourceSimple = models.NPU datasetType = models.TypeCloudBrainTwo computeResource = models.NPUResource + } else if processType == grampus.ProcessorTypeGCU { + computeResourceSimple = models.GCU + datasetType = models.TypeCloudBrainAll + computeResource = models.GCUResource } prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug) @@ -476,19 +517,23 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain image := strings.TrimSpace(form.Image) tpl := tplGrampusTrainJobGPUNew - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + if !jobNamePattern.MatchString(displayJobName) { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } - defer lock.UnLock() - if !jobNamePattern.MatchString(displayJobName) { + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeTrain)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tpl, &form) return } @@ -724,19 +769,23 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain engineName := form.EngineName tpl := tplGrampusTrainJobNPUNew - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + if !jobNamePattern.MatchString(displayJobName) { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } - defer lock.UnLock() - if !jobNamePattern.MatchString(displayJobName) { + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeTrain)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tpl, &form) return } @@ -1308,7 +1357,7 @@ func GrampusTrainJobShow(ctx *context.Context) { taskList := make([]*models.Cloudbrain, 0) taskList = append(taskList, task) prepareSpec4Show(ctx, task) - + ctx.Data["version_list_task"] = taskList ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) @@ -1349,9 +1398,9 @@ func GrampusGetLog(ctx *context.Context) { return } - content, err := grampus.GetTrainJobLog(job.JobID) + result, err := grampus.GetJob(jobID) if err != nil { - log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) + log.Error("GetJob(%s) failed:%v", job.JobName, err) ctx.JSON(http.StatusOK, map[string]interface{}{ "JobName": job.JobName, "Content": "", @@ -1359,20 +1408,26 @@ func GrampusGetLog(ctx *context.Context) { }) return } - result, err := grampus.GetJob(jobID) + exitDiagnostics := "" + if result != nil { + exitDiagnostics = result.ExitDiagnostics + } + + content, err := grampus.GetTrainJobLog(job.JobID) if err != nil { - log.Error("GetJob(%s) failed:%v", job.JobName, err) + log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) ctx.JSON(http.StatusOK, map[string]interface{}{ "JobName": job.JobName, - "Content": content, + "Content": exitDiagnostics, "CanLogDownload": false, }) return } + if result != nil { job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) if job.Status == models.GrampusStatusFailed { - content = content + "\n" + result.ExitDiagnostics + content = content + "\n" + exitDiagnostics } } @@ -1469,7 +1524,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo var commandCode string if processorType == grampus.ProcessorTypeNPU { paramCode += " --model_url=" + modelRemoteObsUrl - commandCode = "/bin/bash /home/work/run_train_for_openi.sh /home/work/openi.py " + grampus.NpuLocalLogUrl + paramCode + ";" + commandCode = "source /home/ma-user/.bashrc;python /home/ma-user/davinci/train/davincirun.py python /home/ma-user/openi.py " + paramCode + ";" } else if processorType == grampus.ProcessorTypeGPU { if pretrainModelFileName != "" { paramCode += " --ckpt_url" + "=" + workDir + "pretrainmodel/" + pretrainModelFileName @@ -1619,7 +1674,22 @@ func GrampusNotebookRestart(ctx *context.Context) { return } + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainRestart(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{JobType: task.JobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + errorMsg = ctx.Tr(errMsg) + } + for { + if errorMsg != "" { + break + } if task.Status != models.GrampusStatusStopped && task.Status != models.GrampusStatusSucceeded && task.Status != models.GrampusStatusFailed { log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) @@ -1654,7 +1724,11 @@ func GrampusNotebookRestart(ctx *context.Context) { if task.ComputeResource == models.NPUResource { computeSourceSimple = models.NPU action = models.ActionCreateGrampusNPUDebugTask + } else if task.ComputeResource == models.GCUResource { + computeSourceSimple = models.GCU + action = models.ActionCreateGrampusGCUDebugTask } + spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ JobType: models.JobType(task.JobType), ComputeResource: computeSourceSimple, @@ -1670,7 +1744,7 @@ func GrampusNotebookRestart(ctx *context.Context) { errorMsg = ctx.Tr("points.insufficient_points_balance") break } - if task.IsGPUTask() { + if task.IsGPUTask() || task.IsGCUTask() { if _, err := os.Stat(getOldJobPath(task)); err != nil { log.Error("Can not find job minio path", err) resultCode = "-1" diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index fe109422e..48536ae7e 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -2,6 +2,7 @@ package repo import ( "archive/zip" + "code.gitea.io/gitea/services/lock" "encoding/json" "errors" "fmt" @@ -35,8 +36,6 @@ import ( "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/obs" - "code.gitea.io/gitea/modules/redis/redis_key" - "code.gitea.io/gitea/modules/redis/redis_lock" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/timeutil" @@ -176,15 +175,19 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm imageId := form.ImageId repo := ctx.Repo.Repository - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeDebug)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) notebookNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplModelArtsNotebookNew, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tplModelArtsNotebookNew, &form) return } - defer lock.UnLock() count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug)) @@ -220,16 +223,16 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm } var datasetInfos map[string]models.DatasetInfo - var attachSize int + var attachSize int64 if uuid != "" { datasetInfos, _, err = models.GetDatasetInfo(uuid) for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int(setting.DebugAttachSize*1000*1000*1000) { - log.Error("The DatasetSize exceeds the limit (%d)", int(setting.DebugAttachSize)) //GB + if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) //GB notebookNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", int(setting.DebugAttachSize*1000*1000*1000)), tplModelArtsNotebookNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tplModelArtsNotebookNew, &form) return } } @@ -457,7 +460,7 @@ func NotebookDebug2(ctx *context.Context) { } if ctx.QueryTrim("file") != "" { - ctx.Redirect(getFileUrl(result.Url, ctx.QueryTrim("file")) + "?token=" + result.Token) + ctx.Redirect(getFileUrl(result.Url, ctx.QueryTrim("file")) + "&token=" + result.Token) } else { if task.BootFile != "" { go cloudbrainTask.UploadNotebookFiles(task) @@ -494,8 +497,21 @@ func NotebookRestart(ctx *context.Context) { var spec *models.Specification task := ctx.Cloudbrain + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainRestart(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{JobType: task.JobType}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + errorMsg = ctx.Tr(errMsg) + } for { + if errMsg != "" { + break + } ctx.CheckWechatBind() if ctx.Written() { return @@ -1083,15 +1099,19 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeTrain)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) trainJobNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplModelArtsTrainJobNew, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tplModelArtsTrainJobNew, &form) return } - defer lock.UnLock() count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain)) @@ -1461,6 +1481,20 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: form.DisplayJobName, JobType: string(models.JobTypeTrain)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) + trainJobNewVersionDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr(errMsg), tplModelArtsTrainJobVersionNew, &form) + return + } + errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) if errStr != "" { trainJobNewVersionDataPrepare(ctx) @@ -1512,16 +1546,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ EngineName := form.EngineName isLatestVersion := modelarts.IsLatestVersion - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeTrain), displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplModelArtsTrainJobVersionNew, &form) - return - } - defer lock.UnLock() - canNewJob, _ := canUserCreateTrainJobVersion(ctx, latestTask.UserID) if !canNewJob { trainJobNewVersionDataPrepare(ctx) @@ -2109,15 +2133,19 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference return } - lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeInference), displayJobName)) - isOk, err := lock.Lock(models.CloudbrainKeyDuration) - if !isOk { - log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: displayJobName, JobType: string(models.JobTypeInference)}, User: ctx.User}) + defer func() { + if lockOperator != nil { + lockOperator.Unlock() + } + }() + + if errMsg != "" { + log.Error("lock processed failed:%s", errMsg, ctx.Data["MsgID"]) inferenceJobErrorNewDataPrepare(ctx, form) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplModelArtsInferenceJobNew, &form) + ctx.RenderWithErr(ctx.Tr(errMsg), tplModelArtsInferenceJobNew, &form) return } - defer lock.UnLock() count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference)) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 85e1b8a25..31bba2aeb 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -359,6 +359,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/all/dosearch/", routers.SearchApi) m.Post("/user/login/kanban", user.SignInPostAPI) m.Get("/home/term", routers.HomeTerm) + m.Get("/home/annual_privacy", routers.HomeAnnual) m.Get("/home/notice", routers.HomeNoticeTmpl) m.Get("/home/privacy", routers.HomePrivacy) m.Get("/extension/tuomin/upload", modelapp.ProcessImageUI) @@ -1189,7 +1190,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRight, repo.CloudBrainDownloadMultiModel) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.CloudBrainNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), context.PointAccount(), repo.CloudBrainCreate) m.Group("/benchmark", func() { m.Get("", reqRepoCloudBrainReader, repo.CloudBrainBenchmarkIndex) @@ -1200,7 +1201,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/rate", reqRepoCloudBrainReader, repo.GetRate) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.CloudBrainBenchmarkNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainBenchmarkCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), context.PointAccount(), repo.CloudBrainBenchmarkCreate) m.Get("/get_child_types", repo.GetChildTypes) }) @@ -1217,7 +1218,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainTrainJobVersionCreate) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.CloudBrainTrainJobNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), context.PointAccount(), repo.CloudBrainCreate) }) m.Group("/inference-job", func() { m.Group("/:jobid", func() { @@ -1227,7 +1228,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/downloadall", cloudbrain.AdminOrJobCreaterRightForTrain, repo.DownloadGPUInferenceResultFile) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.InferenceCloudBrainJobNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainInferencForm{}), repo.CloudBrainInferenceJobCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainInferencForm{}), context.PointAccount(), repo.CloudBrainInferenceJobCreate) }) }, context.RepoRef()) m.Group("/grampus", func() { @@ -1241,7 +1242,7 @@ func RegisterRoutes(m *macaron.Macaron) { }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusNotebookNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusNotebookForm{}), repo.GrampusNotebookCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusNotebookForm{}), context.PointAccount(), repo.GrampusNotebookCreate) }) m.Group("/train-job", func() { @@ -1256,11 +1257,11 @@ func RegisterRoutes(m *macaron.Macaron) { }) m.Group("/gpu", func() { m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusTrainJobGPUNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobGpuCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), context.PointAccount(), repo.GrampusTrainJobGpuCreate) }) m.Group("/npu", func() { m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusTrainJobNPUNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobNpuCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), context.PointAccount(), repo.GrampusTrainJobNpuCreate) }) }) }, context.RepoRef()) @@ -1309,7 +1310,7 @@ func RegisterRoutes(m *macaron.Macaron) { }) m.Get("/create_gpu", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.AiSafetyCreateForGetGPU) m.Get("/create_npu", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.AiSafetyCreateForGetNPU) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForPost) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.AiSafetyCreateForPost) }, context.RepoRef()) m.Group("/debugjob", func() { @@ -1326,7 +1327,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.NotebookNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.Notebook2Create) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), context.PointAccount(), repo.Notebook2Create) }) m.Group("/train-job", func() { @@ -1339,10 +1340,10 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRightForTrain, repo.MultiModelDownload) m.Get("/download_log_file", cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobDownloadLogFile) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, context.PointAccount(), repo.TrainJobNewVersion) - m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) + m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), context.PointAccount(), repo.TrainJobCreateVersion) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.TrainJobNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), context.PointAccount(), repo.TrainJobCreate) m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) @@ -1355,7 +1356,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/downloadall", cloudbrain.AdminOrJobCreaterRightForTrain, repo.DownloadMultiResultFile) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.InferenceJobNew) - m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), context.PointAccount(), repo.InferenceJobCreate) }) }, context.RepoRef()) diff --git a/routers/user/auth.go b/routers/user/auth.go index 9620cb969..61f53e200 100755 --- a/routers/user/auth.go +++ b/routers/user/auth.go @@ -276,7 +276,6 @@ func SignInPostAPI(ctx *context.Context) { func SignInPostCommon(ctx *context.Context, form auth.SignInForm) { ctx.Data["Title"] = ctx.Tr("sign_in") - orderedOAuth2Names, oauth2Providers, err := models.GetActiveOAuth2Providers() if err != nil { ctx.ServerError("UserSignIn", err) @@ -291,7 +290,6 @@ func SignInPostCommon(ctx *context.Context, form auth.SignInForm) { ctx.Data["IsCourse"] = ctx.QueryBool("course") ctx.Data["EnableSSPI"] = models.IsSSPIEnabled() ctx.Data["EnableCloudBrain"] = true - if ctx.HasError() { ctx.HTML(200, tplSignIn) return @@ -765,7 +763,6 @@ func handleSignInFull(ctx *context.Context, u *models.User, remember bool, obeyR } return redirectTo } - if obeyRedirect { ctx.Redirect(setting.AppSubURL + "/dashboard") } diff --git a/services/cloudbrain/cloudbrainTask/ai_model.go b/services/cloudbrain/cloudbrainTask/ai_model.go new file mode 100644 index 000000000..02cc392be --- /dev/null +++ b/services/cloudbrain/cloudbrainTask/ai_model.go @@ -0,0 +1,30 @@ +package cloudbrainTask + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" +) + +func IsModelFileExists(model *models.AiModelManage, fileName string) bool { + if model.Type == models.TypeCloudBrainTwo { + key := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" + fileName + log.Info("IsModelFileExists TypeCloudBrainTwo key=%s", key) + isExist, err := storage.IsObjectExist4Obs(setting.Bucket, key) + if err != nil { + return false + } + return isExist + } else if model.Type == models.TypeCloudBrainOne { + prefix := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" + objectName := prefix + fileName + log.Info("IsModelFileExists TypeCloudBrainOne objectName=%s", objectName) + isExist, err := storage.IsObjectExist4Minio(setting.Attachment.Minio.Bucket, objectName) + if err != nil { + return false + } + return isExist + } + return false +} diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 172fa1502..186d52fb9 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -16,7 +16,7 @@ type StatusInfo struct { var CloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} var CloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} -var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} +var GrampusNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning, models.GrampusStatusPending} var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { CloudBrainTypes: []int{models.TypeCloudBrainOne}, JobType: []models.JobType{models.JobTypeDebug}, @@ -72,6 +72,11 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s JobType: []models.JobType{models.JobTypeDebug}, NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GCUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.GCUResource, }} func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { @@ -87,7 +92,7 @@ func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, c if statusInfo, ok := StatusInfoDict[key]; ok { - return models.GetNotFinalStatusTaskCount(uid, statusInfo.NotFinalStatuses, statusInfo.JobType, statusInfo.CloudBrainTypes, statusInfo.ComputeResource) + return models.GetNotFinalStatusTaskCount(uid, statusInfo.NotFinalStatuses, statusInfo.JobType) } else { return 0, fmt.Errorf("Can not find the status info.") diff --git a/services/cloudbrain/lock.go b/services/cloudbrain/lock.go new file mode 100644 index 000000000..c9d6080f1 --- /dev/null +++ b/services/cloudbrain/lock.go @@ -0,0 +1,20 @@ +package cloudbrain + +import "code.gitea.io/gitea/services/lock" + +func Lock4CloudbrainCreation(ctx *lock.LockContext) (*lock.LockChainOperator, string) { + op := lock.NewLockChainOperator(ctx).Add(lock.CloudbrainUniquenessLock{}).Add(lock.CloudbrainDisplayJobNameLock{}) + errCode := op.Lock() + if errCode != "" { + return nil, errCode + } + return op, "" +} +func Lock4CloudbrainRestart(ctx *lock.LockContext) (*lock.LockChainOperator, string) { + op := lock.NewLockChainOperator(ctx).Add(lock.CloudbrainUniquenessLock{}) + errCode := op.Lock() + if errCode != "" { + return nil, errCode + } + return op, "" +} diff --git a/services/lock/cloudbrain_name_lock.go b/services/lock/cloudbrain_name_lock.go new file mode 100644 index 000000000..bed388e01 --- /dev/null +++ b/services/lock/cloudbrain_name_lock.go @@ -0,0 +1,31 @@ +package lock + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/redis/redis_key" + "code.gitea.io/gitea/modules/redis/redis_lock" + "fmt" +) + +type CloudbrainDisplayJobNameLock struct { +} + +func (c CloudbrainDisplayJobNameLock) IsMatch(ctx *LockContext) bool { + return true +} + +func (c CloudbrainDisplayJobNameLock) Lock(ctx *LockContext) string { + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(ctx.Repo.ID), ctx.Task.JobType, ctx.Task.DisplayJobName)) + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("CloudbrainDisplayJobNameLock lock failed:%v", err) + return "repo.cloudbrain_samejob_err" + } + return "" +} + +func (c CloudbrainDisplayJobNameLock) Unlock(ctx *LockContext) error { + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(ctx.Repo.ID), ctx.Task.JobType, ctx.Task.DisplayJobName)) + return lock.UnLock() +} diff --git a/services/lock/cloudbrain_uniqueness_lock.go b/services/lock/cloudbrain_uniqueness_lock.go new file mode 100644 index 000000000..8297e8b03 --- /dev/null +++ b/services/lock/cloudbrain_uniqueness_lock.go @@ -0,0 +1,30 @@ +package lock + +import ( + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/redis/redis_key" + "code.gitea.io/gitea/modules/redis/redis_lock" + "code.gitea.io/gitea/modules/setting" +) + +type CloudbrainUniquenessLock struct { +} + +func (c CloudbrainUniquenessLock) IsMatch(ctx *LockContext) bool { + return true +} + +func (c CloudbrainUniquenessLock) Lock(ctx *LockContext) string { + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainUniquenessKey(ctx.User.ID, ctx.Task.JobType)) + isOk, err := lock.Lock(setting.CloudbrainUniquenessLockTime) + if !isOk { + log.Error("CloudbrainDisplayJobNameLock lock failed:%v", err) + return "repo.cloudbrain.morethanonejob" + } + return "" +} + +func (c CloudbrainUniquenessLock) Unlock(ctx *LockContext) error { + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainUniquenessKey(ctx.User.ID, ctx.Task.JobType)) + return lock.UnLock() +} diff --git a/services/lock/lock.go b/services/lock/lock.go new file mode 100644 index 000000000..308cebd2a --- /dev/null +++ b/services/lock/lock.go @@ -0,0 +1,17 @@ +package lock + +import ( + "code.gitea.io/gitea/models" +) + +type LockContext struct { + Repo *models.Repository + Task *models.Cloudbrain + User *models.User +} + +type Lock interface { + IsMatch(ctx *LockContext) bool + Lock(ctx *LockContext) string + Unlock(ctx *LockContext) error +} diff --git a/services/lock/lock_operator.go b/services/lock/lock_operator.go new file mode 100644 index 000000000..cd2002f1c --- /dev/null +++ b/services/lock/lock_operator.go @@ -0,0 +1,42 @@ +package lock + +type LockChainOperator struct { + chainList []Lock + lockedList []Lock + ctx *LockContext +} + +func NewLockChainOperator(ctx *LockContext) *LockChainOperator { + return &LockChainOperator{ctx: ctx} +} + +func (b *LockChainOperator) Add(l Lock) *LockChainOperator { + b.chainList = append(b.chainList, l) + return b +} + +func (b *LockChainOperator) Lock() string { + for i := 0; i < len(b.chainList); i++ { + l := b.chainList[i] + if !l.IsMatch(b.ctx) { + continue + } + + if errCode := l.Lock(b.ctx); errCode != "" { + b.Unlock() + return errCode + } + b.lockedList = append(b.lockedList, l) + } + return "" +} + +func (b *LockChainOperator) Unlock() error { + if b.chainList == nil || len(b.chainList) == 0 { + return nil + } + for j := len(b.lockedList) - 1; j >= 0; j-- { + b.lockedList[j].Unlock(b.ctx) + } + return nil +} diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go index 7bac92ab8..2a752acf5 100755 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,7 +10,7 @@ import ( "github.com/elliotchance/orderedmap" ) -var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40} +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41} type ClientsManager struct { Clients *orderedmap.OrderedMap diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl index 5040de2cc..91b0e913a 100755 --- a/templates/admin/cloudbrain/list.tmpl +++ b/templates/admin/cloudbrain/list.tmpl @@ -96,7 +96,7 @@ {{end}}
+ 感谢您阅读《OpenI启智社区2022年度报告授权协议》!在您正式使用OpenI启智社区2022年度报告之前应仔细阅读并充分理解本协议中的全部内容,如您不同意本协议中的任何条款,请立即停止使用OpenI启智社区2022年度报告。您使用OpenI启智社区2022年度报告的行为将被视为已经仔细阅读、充分理解并毫无保留地接受本协议所有条款。 +
+ ++ 1. 制作年度报告 +
++ OpenI启智社区2022年度报告将根据您在平台的历史信息,帮助您生成一份专属年度报告。为此,我们将使用2022自然年度您在OpenI启智社区产生的行为信息,包括但不限于用户名、注册时间、创建项目数、项目下载次数、commit次数、代码行数、创建数据集、上传数据集文件、数据集被收藏数、数据集下载数、云脑任务所有相关数据( GPU/NPU 调试任务、训练任务、运行卡时)。您理解并同意,上述信息是OpenI启智社区生成年度报告的必备信息,如您拒绝授权使用,OpenI启智社区将无法为您制作并提供专属年度报告。未经您的书面同意,我们保证不以超越本协议约定范围使用您的个人信息。 +
++ 2. 使用年度报告 +
++ OpenI启智社区提供的年度报告仅限您个人使用,您可自行留存欣赏或无偿分享、公开。您理解并同意,如因您分享、公开年度报告而产生的任何损失(包括但不限于个人信息泄露等)应由您自行承担,请您在分享、公开年度报告前审慎考虑。 +
++ 3. 知识产权 +
++ 年度报告及其内容(包括但不限于软件、技术、程序、网页、文字、图片、音频、视频、页面设计、商标等)的知识产权由OpenI启智社区享有,您理解并同意,您不得超越本协议目的使用年度报告中的内容素材,如您希望以任何形式将年度报告中的内容素材用于本协议约定范围之外,应当经过所有实际权利人的书面许可。 +
++ 4. 隐私政策 +
++ 其他本隐私保护指引未有涉及的,将适用《OpenI启智社区AI协作平台使用协议》和《OpenI启智社区AI协作平台隐私协议》。 +
+ +