package models import ( "encoding/json" "errors" "fmt" "path" "strconv" "strings" "time" "code.gitea.io/gitea/modules/util" "xorm.io/builder" "xorm.io/xorm" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" ) type CloudbrainStatus string type JobType string type ModelArtsJobStatus string const ( TypeCloudBrainOne int = iota TypeCloudBrainTwo TypeC2Net //智算网络 TypeCDCenter //成都智算中心 TypeCloudBrainAll = -1 ) const ( NPUResource = "NPU" GPUResource = "CPU/GPU" AllResource = "all" //notebook storage category EVSCategory = "EVS" EFSCategory = "EFS" ManagedOwnership = "MANAGED" DetectedOwnership = "DEDICATED" NotebookFeature = "NOTEBOOK" DefaultFeature = "DEFAULT" JobWaiting CloudbrainStatus = "WAITING" JobStopped CloudbrainStatus = "STOPPED" JobSucceeded CloudbrainStatus = "SUCCEEDED" JobFailed CloudbrainStatus = "FAILED" JobRunning CloudbrainStatus = "RUNNING" ModelSafetyTesting CloudbrainStatus = "TESTING" JobTypeDebug JobType = "DEBUG" JobTypeBenchmark JobType = "BENCHMARK" JobTypeModelSafety JobType = "MODELSAFETY" JobTypeSnn4imagenet JobType = "SNN4IMAGENET" JobTypeBrainScore JobType = "BRAINSCORE" JobTypeTrain JobType = "TRAIN" JobTypeInference JobType = "INFERENCE" //notebook ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中 ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败 ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中 ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动 ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中 ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中 ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败 ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中 ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中 ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止 ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障 ModelArtsDeleting ModelArtsJobStatus = "DELETING" //删除中 ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除 ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中 ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败 //trainjob ModelArtsTrainJobUnknown ModelArtsJobStatus = "UNKNOWN" //作业状态未知 ModelArtsTrainJobInit ModelArtsJobStatus = "INIT" //作业初始化状态 ModelArtsTrainJobImageCreating ModelArtsJobStatus = "IMAGE_CREATING" //作业镜像正在创建 ModelArtsTrainJobImageFailed ModelArtsJobStatus = "IMAGE_FAILED" //作业镜像创建失败 ModelArtsTrainJobSubmitTrying ModelArtsJobStatus = "SUBMIT_TRYING" //作业正在提交 ModelArtsTrainJobSubmitFailed ModelArtsJobStatus = "SUBMIT_FAILED" //作业提交失败 ModelArtsTrainJobDeleteFailed ModelArtsJobStatus = "DELETE_FAILED" //作业删除失败 ModelArtsTrainJobWaiting ModelArtsJobStatus = "WAITING" //作业正在排队中 ModelArtsTrainJobRunning ModelArtsJobStatus = "RUNNING" //作业正在运行中 ModelArtsTrainJobKilling ModelArtsJobStatus = "KILLING" //作业正在取消 ModelArtsTrainJobCompleted ModelArtsJobStatus = "COMPLETED" //作业已经完成 ModelArtsTrainJobFailed ModelArtsJobStatus = "FAILED" //作业运行失败 ModelArtsTrainJobKilled ModelArtsJobStatus = "KILLED" //作业取消成功 ModelArtsTrainJobCanceled ModelArtsJobStatus = "CANCELED" //作业取消 ModelArtsTrainJobLost ModelArtsJobStatus = "LOST" //作业丢失 ModelArtsTrainJobScaling ModelArtsJobStatus = "SCALING" //作业正在扩容 ModelArtsTrainJobSubmitModelFailed ModelArtsJobStatus = "SUBMIT_MODEL_FAILED" //提交模型失败 ModelArtsTrainJobDeployServiceFailed ModelArtsJobStatus = "DEPLOY_SERVICE_FAILED" //部署服务失败 ModelArtsTrainJobCheckInit ModelArtsJobStatus = "CHECK_INIT" //审核作业初始化 ModelArtsTrainJobCheckRunning ModelArtsJobStatus = "CHECK_RUNNING" //审核作业正在运行中 ModelArtsTrainJobCheckRunningCompleted ModelArtsJobStatus = "CHECK_RUNNING_COMPLETED" //审核作业已经完成 ModelArtsTrainJobCheckFailed ModelArtsJobStatus = "CHECK_FAILED" //审核作业失败 DURATION_STR_ZERO = "00:00:00" CloudbrainKeyDuration = 24 * time.Hour //grampus GrampusStatusPending = "pending" GrampusStatusRunning = "RUNNING" GrampusStatusFailed = "FAILED" GrampusStatusSucceeded = "SUCCEEDED" GrampusStatusStopped = "STOPPED" GrampusStatusUnknown = "UNKNOWN" GrampusStatusWaiting = "WAITING" ModelSuffix = "models.zip" ) const ( //cluster OpenICluster = "OpenI" C2NetCluster = "C2Net" //AI center AICenterOfCloudBrainOne = "OpenIOne" AICenterOfCloudBrainTwo = "OpenITwo" AICenterOfChengdu = "OpenIChengdu" //ComputeResource GPU = "GPU" NPU = "NPU" ) type Cloudbrain struct { ID int64 `xorm:"pk autoincr"` JobID string `xorm:"INDEX NOT NULL"` JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"` JobName string DisplayJobName string Status string UserID int64 `xorm:"INDEX NOT NULL"` RepoID int64 `xorm:"INDEX NOT NULL"` SubTaskName string ContainerID string ContainerIp string CreatedUnix timeutil.TimeStamp `xorm:"INDEX"` UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` Duration int64 `xorm:"DEFAULT 0"` //运行时长 单位秒 TrainJobDuration string `xorm:"DEFAULT '00:00:00'"` Image string //镜像名称 GpuQueue string //GPU类型即GPU队列 ResourceSpecId int //GPU规格id DeletedAt time.Time `xorm:"deleted"` CanDebug bool `xorm:"-"` CanDel bool `xorm:"-"` CanModify bool `xorm:"-"` Type int `xorm:"INDEX"` BenchmarkTypeID int BenchmarkChildTypeID int CardType string Cluster string VersionID int64 //版本id VersionName string `xorm:"INDEX"` //当前版本 Uuid string //数据集id DatasetName string `xorm:"varchar(2000)"` VersionCount int //任务的当前版本数量,不包括删除的 IsLatestVersion string //是否是最新版本,1是,0否 CommitID string //提交的仓库代码id PreVersionName string //父版本名称 ComputeResource string //计算资源,例如npu EngineID int64 //引擎id ImageID string //grampus image_id AiCenter string //grampus ai center: center_id+center_name TrainUrl string //输出模型的obs路径 BranchName string //分支名称 Parameters string //传给modelarts的param参数 BootFile string //启动文件 DataUrl string //数据集的obs路径 LogUrl string //日志输出的obs路径 PreVersionId int64 //父版本的版本id FlavorCode string //modelarts上的规格id Description string `xorm:"varchar(256)"` //描述 WorkServerNumber int //节点数 FlavorName string //规格名称 EngineName string //引擎名称 TotalVersionCount int //任务的所有版本数量,包括删除的 LabelName string //标签名称 ModelName string //模型名称 ModelVersion string //模型版本 CkptName string //权重文件名称 PreTrainModelUrl string //预训练模型地址 ResultUrl string //推理结果的obs路径 ResultJson string `xorm:"varchar(4000)"` User *User `xorm:"-"` Repo *Repository `xorm:"-"` BenchmarkType string `xorm:"-"` //算法评测,模型评测 BenchmarkTypeName string `xorm:"-"` BenchmarkTypeRankLink string `xorm:"-"` StartTime timeutil.TimeStamp EndTime timeutil.TimeStamp Cleared bool `xorm:"DEFAULT false"` Spec *Specification `xorm:"-"` } type CloudbrainShow struct { ID int64 JobID string RepoFullName string Type int JobType string DisplayJobName string Duration string ResourceSpec *Specification ComputeResource string AiCenter string WorkServerNumber int } type CloudbrainShow4Action struct { ID int64 JobID string Type int JobType string DisplayJobName string ComputeResource string } func (task *Cloudbrain) ToShow() *CloudbrainShow { n := 1 if task.WorkServerNumber > 1 { n = task.WorkServerNumber } c := &CloudbrainShow{ ID: task.ID, JobID: task.JobID, JobType: task.JobType, Type: task.Type, DisplayJobName: task.DisplayJobName, Duration: task.TrainJobDuration, ResourceSpec: task.Spec, ComputeResource: task.ComputeResource, WorkServerNumber: n, } if task.Repo != nil { c.RepoFullName = task.Repo.FullName() } return c } func (task *Cloudbrain) ComputeAndSetDuration() { var d int64 if task.StartTime == 0 { d = 0 } else if task.EndTime == 0 { if !task.IsTerminal() { d = time.Now().Unix() - task.StartTime.AsTime().Unix() } } else { d = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix() } if d < 0 { d = 0 } task.Duration = d task.TrainJobDuration = ConvertDurationToStr(d) } func (task *Cloudbrain) CorrectCreateUnix() { if task.StartTime > 0 && task.CreatedUnix > task.StartTime { task.CreatedUnix = task.StartTime } } func (task *Cloudbrain) IsTerminal() bool { status := task.Status return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) || status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) || status == GrampusStatusFailed || status == GrampusStatusSucceeded || status == GrampusStatusStopped } func (task *Cloudbrain) IsRunning() bool { status := task.Status return status == string(ModelArtsTrainJobRunning) || status == string(ModelArtsRunning) || status == string(JobRunning) || status == GrampusStatusRunning } func (task *Cloudbrain) IsUserHasRight(user *User) bool { if user == nil { return false } return user.IsAdmin || user.ID == task.UserID } func ConvertDurationToStr(duration int64) string { if duration <= 0 { return DURATION_STR_ZERO } return util.AddZero(duration/3600) + ":" + util.AddZero(duration%3600/60) + ":" + util.AddZero(duration%60) } func ConvertStrToDuration(trainJobDuration string) int64 { trainJobDurationList := strings.Split(trainJobDuration, ":") if len(trainJobDurationList) == 3 { i, _ := strconv.ParseInt(trainJobDurationList[0], 10, 64) j, _ := strconv.ParseInt(trainJobDurationList[1], 10, 64) k, _ := strconv.ParseInt(trainJobDurationList[2], 10, 64) return i*3600 + j*60 + k } else { return 0 } } func IsTrainJobTerminal(status string) bool { return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == GrampusStatusFailed || status == GrampusStatusStopped || status == GrampusStatusSucceeded } func IsModelArtsDebugJobTerminal(status string) bool { return status == string(ModelArtsStopped) } func IsCloudBrainOneDebugJobTerminal(status string) bool { return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) } func ParseAndSetDurationFromCloudBrainOne(result JobResultPayload, task *Cloudbrain) { isActivated := result.JobStatus.CreatedTime > 0 if task.StartTime == 0 && isActivated { task.StartTime = timeutil.TimeStamp(result.JobStatus.CreatedTime / 1000) } if task.EndTime == 0 && IsCloudBrainOneDebugJobTerminal(task.Status) && isActivated { if result.JobStatus.CompletedTime > 0 { task.EndTime = timeutil.TimeStamp(result.JobStatus.CompletedTime / 1000) } } task.CorrectCreateUnix() task.ComputeAndSetDuration() } func ParseAndSetDurationFromModelArtsNotebook(result *GetNotebook2Result, job *Cloudbrain) { if job.StartTime == 0 && result.Lease.UpdateTime > 0 { job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) } job.Status = result.Status if job.EndTime == 0 && IsModelArtsDebugJobTerminal(job.Status) { job.EndTime = timeutil.TimeStampNow() } job.CorrectCreateUnix() job.ComputeAndSetDuration() } type CloudbrainInfo struct { Cloudbrain `xorm:"extends"` User `xorm:"extends"` } type CloudBrainLoginResult struct { Code string Msg string Payload map[string]interface{} } type TaskRole struct { Name string `json:"name"` TaskNumber int `json:"taskNumber"` MinSucceededTaskCount int `json:"minSucceededTaskCount"` MinFailedTaskCount int `json:"minFailedTaskCount"` CPUNumber int `json:"cpuNumber"` GPUNumber int `json:"gpuNumber"` MemoryMB int `json:"memoryMB"` ShmMB int `json:"shmMB"` Command string `json:"command"` NeedIBDevice bool `json:"needIBDevice"` IsMainRole bool `json:"isMainRole"` UseNNI bool `json:"useNNI"` } type StHostPath struct { Path string `json:"path"` MountPath string `json:"mountPath"` ReadOnly bool `json:"readOnly"` } type Volume struct { HostPath StHostPath `json:"hostPath"` } type CreateJobParams struct { JobName string `json:"jobName"` RetryCount int8 `json:"retryCount"` GpuType string `json:"gpuType"` Image string `json:"image"` TaskRoles []TaskRole `json:"taskRoles"` Volumes []Volume `json:"volumes"` } type CreateJobResult struct { Code string `json:"code"` Msg string `json:"msg"` Payload map[string]interface{} `json:"payload"` } type QueueDetailResult struct { Code string `json:"code"` Msg string `json:"msg"` Payload map[string]QueueDetail `json:"payload"` } type QueueDetail struct { JobScheduleInfo JobScheduleInfo `json:"JobScheduleInfo"` } type JobScheduleInfo struct { Pending int `json:"Pending"` Running int `json:"Running"` MedianPendingJobDurationSec int `json:"MedianPendingJobDurationSec"` } type GetJobResult struct { Code string `json:"code"` Msg string `json:"msg"` Payload map[string]interface{} `json:"payload"` } type GetImagesResult struct { Code string `json:"code"` Msg string `json:"msg"` Payload GetImagesPayload `json:"payload"` } type GetImagesPayload struct { Count int `json:"count"` TotalPages int `json:"totalPages,omitempty"` ImageInfo []*ImageInfo `json:"rows"` } type CloudbrainsOptions struct { ListOptions RepoID int64 // include all repos if empty UserID int64 JobID string SortType string CloudbrainIDs []int64 JobStatus []string JobStatusNot bool Keyword string Type int JobTypes []string VersionName string IsLatestVersion string JobTypeNot bool NeedRepoInfo bool RepoIDList []int64 BeginTime time.Time EndTime time.Time ComputeResource string BeginTimeUnix int64 EndTimeUnix int64 AiCenter string NeedDeleteInfo string Cluster string } type TaskPod struct { TaskRoleStatus struct { Name string `json:"name"` } `json:"taskRoleStatus"` //TaskStatuses []struct { // TaskIndex int `json:"taskIndex"` // PodUID string `json:"podUid"` // PodIP string `json:"podIp"` // PodName string `json:"podName"` // ContainerID string `json:"containerId"` // ContainerIP string `json:"containerIp"` // ContainerGpus string `json:"containerGpus"` // State string `json:"state"` // StartAt time.Time `json:"startAt"` // FinishedAt time.Time `json:"finishedAt"` // ExitCode int `json:"exitCode"` // ExitDiagnostics string `json:"exitDiagnostics"` // RetriedCount int `json:"retriedCount"` // StartTime string // FinishedTime string //} `json:"taskStatuses"` TaskStatuses []TaskStatuses `json:"taskStatuses"` } type TaskStatuses struct { TaskIndex int `json:"taskIndex"` PodUID string `json:"podUid"` PodIP string `json:"podIp"` PodName string `json:"podName"` ContainerID string `json:"containerId"` ContainerIP string `json:"containerIp"` ContainerGpus string `json:"containerGpus"` State string `json:"state"` StartAt time.Time `json:"startAt"` FinishedAt time.Time `json:"finishedAt"` ExitCode int `json:"exitCode"` ExitDiagnostics string `json:"exitDiagnostics"` RetriedCount int `json:"retriedCount"` StartTime string FinishedTime string } type TaskInfo struct { Username string `json:"username"` TaskName string `json:"task_name"` CodeName string `json:"code_name"` BenchmarkCategory []string `json:"selected_category"` CodeLink string `json:"code_link"` GpuType string `json:"gpu_type"` } func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) { data, _ := json.Marshal(input) var taskPod TaskPod err := json.Unmarshal(data, &taskPod) taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05") taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05") //if the task is not finished or stopped,the cloudbrain renturns 0001-01-01 08:00:00, the finishedTime shows with - if strings.HasPrefix(taskPod.TaskStatuses[0].FinishedTime, "0001") { taskPod.TaskStatuses[0].FinishedTime = "-" } return taskPod, err } type JobResultPayload struct { ID string `json:"id"` Name string `json:"name"` Platform string `json:"platform"` JobStatus struct { Username string `json:"username"` State string `json:"state"` SubState string `json:"subState"` ExecutionType string `json:"executionType"` Retries int `json:"retries"` CreatedTime int64 `json:"createdTime"` CompletedTime int64 `json:"completedTime"` AppID string `json:"appId"` AppProgress string `json:"appProgress"` AppTrackingURL string `json:"appTrackingUrl"` AppLaunchedTime int64 `json:"appLaunchedTime"` AppCompletedTime interface{} `json:"appCompletedTime"` AppExitCode int `json:"appExitCode"` AppExitDiagnostics string `json:"appExitDiagnostics"` AppExitType interface{} `json:"appExitType"` VirtualCluster string `json:"virtualCluster"` StartTime string EndTime string } `json:"jobStatus"` TaskRoles map[string]interface{} `json:"taskRoles"` Resource struct { CPU int `json:"cpu"` Memory string `json:"memory"` NvidiaComGpu int `json:"nvidia.com/gpu"` } `json:"resource"` Config struct { Image string `json:"image"` JobID string `json:"jobId"` GpuType string `json:"gpuType"` JobName string `json:"jobName"` JobType string `json:"jobType"` TaskRoles []struct { Name string `json:"name"` ShmMB int `json:"shmMB"` Command string `json:"command"` MemoryMB int `json:"memoryMB"` CPUNumber int `json:"cpuNumber"` GpuNumber int `json:"gpuNumber"` IsMainRole bool `json:"isMainRole"` TaskNumber int `json:"taskNumber"` NeedIBDevice bool `json:"needIBDevice"` MinFailedTaskCount int `json:"minFailedTaskCount"` MinSucceededTaskCount int `json:"minSucceededTaskCount"` } `json:"taskRoles"` RetryCount int `json:"retryCount"` } `json:"config"` Userinfo struct { User string `json:"user"` OrgID string `json:"org_id"` } `json:"userinfo"` } func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) { data, _ := json.Marshal(input) var jobResultPayload JobResultPayload err := json.Unmarshal(data, &jobResultPayload) jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05") jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05") if jobResultPayload.JobStatus.State == string(JobWaiting) { jobResultPayload.JobStatus.StartTime = "-" jobResultPayload.JobStatus.EndTime = "-" } return jobResultPayload, err } type ImagesResultPayload struct { Images []struct { ID int `json:"id"` Name string `json:"name"` Place string `json:"place"` Description string `json:"description"` Provider string `json:"provider"` Createtime string `json:"createtime"` Remark string `json:"remark"` } `json:"taskStatuses"` } type ImageInfo struct { ID int `json:"id"` Name string `json:"name"` Place string `json:"place"` Description string `json:"description"` Provider string `json:"provider"` Createtime string `json:"createtime"` Remark string `json:"remark"` IsPublic int `json:"isPublic"` PlaceView string } type Categories struct { Category []*Category `json:"category"` } type Category struct { Id int `json:"id"` Value string `json:"value"` } type BenchmarkTypes struct { BenchmarkType []*BenchmarkType `json:"type"` } type BenchmarkType struct { Id int `json:"id"` RankLink string `json:"rank_link"` First string `json:"first"` //一级算法类型名称 Second []*BenchmarkDataset `json:"second"` } type BenchmarkDataset struct { Id int `json:"id"` Value string `json:"value"` //二级算法类型名称 Attachment string `json:"attachment"` //数据集的uuid Owner string `json:"owner"` //评估脚本所在仓库的拥有者 RepoName string `json:"repo_name"` //评估脚本所在仓库的名称 } type GpuInfos struct { GpuInfo []*GpuInfo `json:"gpu_type"` } type GpuInfo struct { Id int `json:"id"` Value string `json:"value"` Queue string `json:"queue"` } type ResourceSpecs struct { ResourceSpec []*ResourceSpec `json:"resorce_specs"` } type ResourceSpec struct { Id int `json:"id"` CpuNum int `json:"cpu"` GpuNum int `json:"gpu"` MemMiB int `json:"memMiB"` ShareMemMiB int `json:"shareMemMiB"` UnitPrice int64 `json:"unitPrice"` } type FlavorInfos struct { FlavorInfo []*FlavorInfo `json:"flavor_info"` } type FlavorInfo struct { Id int `json:"id"` Value string `json:"value"` Desc string `json:"desc"` UnitPrice int64 `json:"unitPrice"` } type SpecialPools struct { Pools []*SpecialPool `json:"pools"` } type SpecialPool struct { Org string `json:"org"` Type string `json:"type"` IsExclusive bool `json:"isExclusive"` Pool []*GpuInfo `json:"pool"` JobType []string `json:"jobType"` ResourceSpec []*ResourceSpec `json:"resourceSpecs"` Flavor []*setting.FlavorInfo `json:"flavor"` } type PoolInfos struct { PoolInfo []*PoolInfo `json:"pool_info"` } type PoolInfo struct { PoolId string `json:"pool_id"` PoolName string `json:"pool_name"` PoolType string `json:"pool_type"` } type CommitImageCloudBrainParams struct { Ip string `json:"ip"` TaskContainerId string `json:"taskContainerId"` ImageTag string `json:"imageTag"` ImageDescription string `json:"imageDescription"` } type CommitImageParams struct { CommitImageCloudBrainParams IsPrivate bool Topics []string CloudBrainType int UID int64 Place string Type int } type CommitImageResult struct { Code string `json:"code"` Msg string `json:"msg"` Payload map[string]interface{} `json:"payload"` } type GetJobLogParams struct { Size string `json:"size"` Sort string `json:"sort"` QueryInfo QueryInfo `json:"query"` } type QueryInfo struct { MatchInfo MatchInfo `json:"match"` } type MatchInfo struct { PodName string `json:"kubernetes.pod.name"` } type GetJobLogResult struct { ScrollID string `json:"_scroll_id"` Took int `json:"took"` TimedOut bool `json:"timed_out"` Shards struct { Total int `json:"total"` Successful int `json:"successful"` Skipped int `json:"skipped"` Failed int `json:"failed"` } `json:"_shards"` Hits struct { Hits []Hits `json:"hits"` } `json:"hits"` } type Hits struct { Index string `json:"_index"` Type string `json:"_type"` ID string `json:"_id"` Source struct { Message string `json:"message"` } `json:"_source"` Sort []int `json:"sort"` } type GetAllJobLogParams struct { Scroll string `json:"scroll"` ScrollID string `json:"scroll_id"` } type DeleteJobLogTokenParams struct { ScrollID string `json:"scroll_id"` } type DeleteJobLogTokenResult struct { Succeeded bool `json:"succeeded"` NumFreed int `json:"num_freed"` } type CloudBrainResult struct { Code string `json:"code"` Msg string `json:"msg"` } type CreateNotebook2Params struct { JobName string `json:"name"` Description string `json:"description"` Duration int64 `json:"duration"` //ms Feature string `json:"feature"` PoolID string `json:"pool_id"` Flavor string `json:"flavor"` ImageID string `json:"image_id"` WorkspaceID string `json:"workspace_id"` Volume VolumeReq `json:"volume"` } type CreateNotebookWithoutPoolParams struct { JobName string `json:"name"` Description string `json:"description"` Duration int64 `json:"duration"` //ms Feature string `json:"feature"` Flavor string `json:"flavor"` ImageID string `json:"image_id"` WorkspaceID string `json:"workspace_id"` Volume VolumeReq `json:"volume"` } type VolumeReq struct { Capacity int `json:"capacity"` Category string `json:"category"` Ownership string `json:"ownership"` Uri string `json:"uri"` } type CreateNotebookParams struct { JobName string `json:"name"` Description string `json:"description"` ProfileID string `json:"profile_id"` Flavor string `json:"flavor"` Spec Spec `json:"spec"` Workspace Workspace `json:"workspace"` Pool Pool `json:"pool"` } type Pool struct { ID string `json:"id"` Name string `json:"name"` Type string `json:"type"` } type Workspace struct { ID string `json:"id"` } type Spec struct { Storage Storage `json:"storage"` AutoStop AutoStop `json:"auto_stop"` } type AutoStop struct { Enable bool `json:"enable"` Duration int `json:"duration"` } type Storage struct { Type string `json:"type"` Location Location `json:"location"` } type Location struct { Path string `json:"path"` } type NotebookResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` } type CreateNotebookResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` ID string `json:"id"` Name string `json:"name"` Description string `json:"description"` Status string `json:"status"` CreationTimestamp string `json:"creation_timestamp"` LatestUpdateTimestamp string `json:"latest_update_timestamp"` Profile struct { ID string `json:"id"` Name string `json:"name"` Description string `json:"description"` DeType string `json:"de_type"` FlavorType string `json:"flavor_type"` } `json:"profile"` Flavor string `json:"flavor"` FlavorDetails struct { Name string `json:"name"` Status string `json:"status"` QueuingNum int `json:"queuing_num"` QueueLeftTime int `json:"queue_left_time"` //s Duration int `json:"duration"` //auto_stop_time s } `json:"flavor_details"` } type GetNotebookResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` ID string `json:"id"` Name string `json:"name"` Description string `json:"description"` Status string `json:"status"` CreationTimestamp string `json:"creation_timestamp"` CreateTime string LatestUpdateTimestamp string `json:"latest_update_timestamp"` LatestUpdateTime string Profile struct { ID string `json:"id"` Name string `json:"name"` Description string `json:"description"` DeType string `json:"de_type"` FlavorType string `json:"flavor_type"` } `json:"profile"` Flavor string `json:"flavor"` FlavorDetails struct { Name string `json:"name"` Status string `json:"status"` QueuingNum int `json:"queuing_num"` QueueLeftTime int `json:"queue_left_time"` //s Duration int `json:"duration"` //auto_stop_time s } `json:"flavor_details"` QueuingInfo struct { ID string `json:"id"` Name string `json:"name"` Flavor string `json:"flavor"` DeType string `json:"de_type"` Status string `json:"status"` BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue BeginTime string RemainTime int `json:"remain_time"` //remain time of instance EndTimestamp int `json:"end_timestamp"` // EndTime string Rank int `json:"rank"` //rank of instance in queue } `json:"queuing_info"` Spec struct { Annotations struct { TargetDomain string `json:"target_domain"` Url string `json:"url"` } `json:"annotations"` } `json:"spec"` } type GetNotebook2Result struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` FailReason string `json:"fail_reason"` ID string `json:"id"` Name string `json:"name"` Description string `json:"description"` Status string `json:"status"` Url string `json:"url"` //实例访问的URL Token string `json:"token"` //notebook鉴权使用的token信息 Flavor string `json:"flavor"` CreateTime string LatestUpdateTime string CreateAt int64 `json:"create_at"` //实例创建的时间,UTC毫秒 UpdateAt int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒 Image struct { Name string `json:"name"` Status string `json:"status"` QueuingNum int `json:"queuing_num"` QueueLeftTime int `json:"queue_left_time"` //s Duration int `json:"duration"` //auto_stop_time s } `json:"image"` Lease struct { CreateTime int64 `json:"create_at"` //实例创建的时间,UTC毫秒 Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例 UpdateTime int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒 } `json:"lease"` //实例自动停止的倒计时信息 VolumeRes struct { Capacity int `json:"capacity"` Category string `json:"category"` MountPath string `json:"mount_path"` Ownership string `json:"ownership"` Status string `json:"status"` } `json:"volume"` } type GetTokenParams struct { Auth Auth `json:"auth"` } type Auth struct { Identity Identity `json:"identity"` Scope Scope `json:"scope"` } type Scope struct { Project Project `json:"project"` } type Project struct { Name string `json:"name"` } type Identity struct { Methods []string `json:"methods"` Password Password `json:"password"` } type Password struct { User NotebookUser `json:"user"` } type NotebookUser struct { Name string `json:"name"` Password string `json:"password"` Domain Domain `json:"domain"` } type Domain struct { Name string `json:"name"` } const ( ActionStart = "start" ActionStop = "stop" ActionRestart = "restart" ActionQueue = "queue" ActionDequeue = "dequeue" ) type NotebookAction struct { Action string `json:"action"` } type NotebookActionResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` CurrentStatus string `json:"current_status"` PreviousState string `json:"previous_state"` Status string `json:"status"` } type NotebookGetJobTokenResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` Token string `json:"token"` } type NotebookDelResult struct { NotebookResult InstanceID string `json:"instance_id"` } type CreateUserImageTrainJobParams struct { JobName string `json:"job_name"` Description string `json:"job_desc"` Config UserImageConfig `json:"config"` WorkspaceID string `json:"workspace_id"` } type UserImageConfig struct { WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` UserImageUrl string `json:"user_image_url"` UserCommand string `json:"user_command"` CreateVersion bool `json:"create_version"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` } type CreateTrainJobParams struct { JobName string `json:"job_name"` Description string `json:"job_desc"` Config Config `json:"config"` WorkspaceID string `json:"workspace_id"` } type Config struct { WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` //UserImageUrl string `json:"user_image_url"` //UserCommand string `json:"user_command"` CreateVersion bool `json:"create_version"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` } type CreateInferenceJobParams struct { JobName string `json:"job_name"` Description string `json:"job_desc"` InfConfig InfConfig `json:"config"` WorkspaceID string `json:"workspace_id"` } type CreateInfUserImageParams struct { JobName string `json:"job_name"` Description string `json:"job_desc"` Config InfUserImageConfig `json:"config"` WorkspaceID string `json:"workspace_id"` } type InfConfig struct { WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` LogUrl string `json:"log_url"` CreateVersion bool `json:"create_version"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` } type InfUserImageConfig struct { WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` LogUrl string `json:"log_url"` CreateVersion bool `json:"create_version"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` UserImageUrl string `json:"user_image_url"` UserCommand string `json:"user_command"` } type CreateTrainJobVersionParams struct { Description string `json:"job_desc"` Config TrainJobVersionConfig `json:"config"` } type CreateTrainJobVersionUserImageParams struct { Description string `json:"job_desc"` Config TrainJobVersionUserImageConfig `json:"config"` } type TrainJobVersionConfig struct { WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` PreVersionId int64 `json:"pre_version_id"` } type TrainJobVersionUserImageConfig struct { WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` PreVersionId int64 `json:"pre_version_id"` UserImageUrl string `json:"user_image_url"` UserCommand string `json:"user_command"` } type CreateConfigParams struct { ConfigName string `json:"config_name"` Description string `json:"config_desc"` WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` } type Parameter struct { Label string `json:"label"` Value string `json:"value"` } type Parameters struct { Parameter []Parameter `json:"parameter"` } type Datasurl struct { DatasetUrl string `json:"dataset_url"` DatasetName string `json:"dataset_name"` } type DatasetDownload struct { DatasetName string `json:"dataset_name"` DatasetDownloadLink string `json:"dataset_download_link"` RepositoryLink string `json:"repository_link"` IsDelete bool `json:"is_delete"` } type DataSource struct { DatasetID string `json:"dataset_id"` DatasetVersion string `json:"dataset_version"` Type string `json:"type"` DataUrl string `json:"data_url"` } type Volumes struct { Nfs Nfs `json:"nfs"` HostPath HostPath `json:"host_path"` } type Nfs struct { ID string `json:"id"` SourcePath string `json:"src_path"` DestPath string `json:"dest_path"` ReadOnly bool `json:"read_only"` } type HostPath struct { SourcePath string `json:"src_path"` DestPath string `json:"dest_path"` ReadOnly bool `json:"read_only"` } type Flavor struct { Code string `json:"code"` } type CreateTrainJobResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` JobName string `json:"job_name"` JobID int64 `json:"job_id"` Status int `json:"status"` CreateTime int64 `json:"create_time"` VersionID int64 `json:"version_id"` ResourceID string `json:"resource_id"` VersionName string `json:"version_name"` } type CreateTrainJobConfigResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` } type GetResourceSpecsResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` SpecTotalCount int `json:"spec_total_count"` Specs []Specs `json:"specs"` } type Specs struct { Core string `json:"core"` Cpu string `json:"cpu"` IsNoResource bool `json:"no_resource"` GpuType string `json:"gpu_type"` SpecID int64 `json:"spec_id"` GpuNum int `json:"gpu_num"` SpecCode string `json:"spec_code"` Storage string `json:"storage"` MaxNum int `json:"max_num"` UnitNum int `json:"unit_num"` InterfaceType int `json:"interface_type"` } type GetConfigListResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` ConfigTotalCount int `json:"config_total_count"` ParaConfigs []ParaConfig `json:"configs"` } type ParaConfig struct { ConfigName string `json:"config_name"` ConfigDesc string `json:"config_desc"` CreateTime int64 `json:"create_time"` EngineType int `json:"engine_type"` EngineName string `json:"engine_name"` EngineId int64 `json:"engine_id"` EngineVersion string `json:"engine_version"` UserImageUrl string `json:"user_image_url"` UserCommand string `json:"user_command"` Result GetConfigResult } type GetConfigResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` ConfigName string `json:"config_name"` Description string `json:"config_desc"` WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` } type ErrorResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_message"` IsSuccess bool `json:"is_success"` } type GetTrainJobResult struct { IsSuccess bool `json:"is_success"` JobName string `json:"job_name"` JobID int64 `json:"job_id"` Description string `json:"job_desc"` IntStatus int `json:"status"` Status string LongCreateTime int64 `json:"create_time"` CreateTime string Duration int64 `json:"duration"` //训练作业的运行时间,单位为毫秒 TrainJobDuration string //训练作业的运行时间,格式为hh:mm:ss VersionID int64 `json:"version_id"` ResourceID string `json:"resource_id"` VersionName string `json:"version_name"` PreVersionID int64 `json:"pre_version_id"` WorkServerNum int `json:"worker_server_num"` AppUrl string `json:"app_url"` //训练作业的代码目录 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL EngineID int64 `json:"engine_id"` EngineName string `json:"engine_name"` EngineVersion string `json:"engine_version"` TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` Flavor Flavor `json:"flavor"` PoolID string `json:"pool_id"` PoolName string `json:"pool_name"` NasMountPath string `json:"nas_mount_path"` NasShareAddr string `json:"nas_share_addr"` DatasetName string ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话 StartTime int64 `json:"start_time"` //训练作业开始时间。 } type GetTrainJobLogResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` Content string `json:"content"` Lines int `json:"lines"` StartLine string `json:"start_line"` EndLine string `json:"end_line"` } type GetTrainJobLogFileNamesResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` LogFileList []string `json:"log_file_list"` } type TrainJobResult struct { ErrorCode string `json:"error_code"` ErrorMsg string `json:"error_msg"` IsSuccess bool `json:"is_success"` } type LogFile struct { Name string } type JobList struct { JobName string `json:"job_name"` JobID int64 `json:"job_id"` VersionID int64 `json:"version_id"` VersionCount int64 `json:"version_count"` Description string `json:"job_desc"` IntStatus int `json:"status"` } type GetTrainJobListResult struct { ErrorResult JobTotalCount int `json:"job_total_count"` //查询到的用户创建作业总数 JobCountLimit int `json:"job_count_limit"` //用户还可以创建训练作业的数量 Quotas int `json:"quotas"` //训练作业的运行数量上限 JobList []JobList `json:"jobs"` } type JobVersionList struct { VersionName string `json:"version_name"` VersionID int64 `json:"version_id"` IntStatus int `json:"status"` } type GetTrainJobVersionListResult struct { ErrorResult JobID int64 `json:"job_id"` JobName string `json:"job_name"` JobDesc string `json:"job_desc"` VersionCount int64 `json:"version_count"` JobVersionList []JobVersionList `json:"versions"` } type NotebookList struct { JobName string `json:"name"` JobID string `json:"id"` Status string `json:"status"` } type GetNotebookListResult struct { TotalCount int64 `json:"total"` //总的记录数量 CurrentPage int `json:"current"` //当前页数 TotalPages int `json:"pages"` //总的页数 Size int `json:"size"` //每一页的数量 NotebookList []NotebookList `json:"data"` } //Grampus type GrampusResult struct { ErrorCode int `json:"errorCode"` ErrorMsg string `json:"errorMsg"` } type GrampusJobInfo struct { StartedAt int64 `json:"startedAt"` RunSec int64 `json:"runSec"` CompletedAt int64 `json:"completedAt"` CreatedAt int64 `json:"createdAt"` UpdatedAt int64 `json:"updatedAt"` Desc string `json:"desc"` JobID string `json:"id"` Name string `json:"name"` Status string `json:"status"` UserID string `json:"userId"` Tasks []GrampusTasks `json:"tasks"` } type Center struct { ID string `json:"id"` Name string `json:"name"` } type GrampusSpec struct { CreatedAt int64 `json:"createdAt"` UpdatedAt int64 `json:"updatedAt"` ID string `json:"id"` Name string `json:"name"` ProcessorType string `json:"processorType"` Centers []Center `json:"centers"` SpecInfo SpecInfo `json:"specInfo"` } type GrampusAiCenter struct { AccDevices []GrampusAccDevice `json:"accDevices"` Id string `json:"id"` Name string `json:"name"` Resource []GrampusCenterResource `json:"resource"` } type GrampusAccDevice struct { Kind string `json:"kind"` //加速卡类别, npu.huawei.com/NPU,nvidia.com/gpu,cambricon.com/mlu Model string `json:"model"` //加速卡型号 } type GrampusCenterResource struct { Allocated string `json:"allocated"` Capacity string `json:"capacity"` Name string `json:"name"` } type SpecInfo struct { AccDeviceKind string `json:"accDeviceKind"` AccDeviceMemory string `json:"accDeviceMemory"` AccDeviceModel string `json:"accDeviceModel"` AccDeviceNum int `json:"accDeviceNum"` CpuCoreNum int `json:"cpuCoreNum"` MemorySize string `json:"memorySize"` } type GetGrampusResourceSpecsResult struct { GrampusResult Infos []GrampusSpec `json:"resourceSpecs"` } type GetGrampusAiCentersResult struct { GrampusResult Infos []GrampusAiCenter `json:"aiCenterInfos"` TotalSize int `json:"totalSize"` } type GrampusImage struct { CreatedAt int64 `json:"createdAt"` UpdatedAt int64 `json:"updatedAt"` ID string `json:"id"` Name string `json:"name"` ProcessorType string `json:"processorType"` } type GetGrampusImagesResult struct { GrampusResult TotalSize int `json:"totalSize"` Infos []GrampusImage `json:"images"` } type CreateGrampusJobResponse struct { GrampusResult JobInfo GrampusJobInfo `json:"otJob"` } type GetGrampusJobResponse struct { GrampusResult JobInfo GrampusJobInfo `json:"otJob"` } type GrampusStopJobResponse struct { GrampusResult StoppedAt int64 `json:"stoppedAt"` } type GrampusTasks struct { Command string `json:"command"` Name string `json:"name"` ImageId string `json:"imageId"` ResourceSpecId string `json:"resourceSpecId"` ImageUrl string `json:"imageUrl"` CenterID []string `json:"centerID"` CenterName []string `json:"centerName"` ReplicaNum int `json:"replicaNum"` Datasets []GrampusDataset `json:"datasets"` Models []GrampusDataset `json:"models"` Code GrampusDataset `json:"code"` BootFile string `json:"bootFile"` } type GrampusDataset struct { Name string `json:"name"` Bucket string `json:"bucket"` EndPoint string `json:"endPoint"` ObjectKey string `json:"objectKey"` } type CreateGrampusJobRequest struct { Name string `json:"name"` Tasks []GrampusTasks `json:"tasks"` } type GetTrainJobMetricStatisticResult struct { TrainJobResult Interval int `json:"interval"` //查询的时间间隔,单位为分钟 MetricsInfo []Metrics `json:"metrics"` //监控详情 } type Metrics struct { Metric string `json:"metric"` //监控指标项 Value []string `json:"value"` //获取的监控值的序列,元素为String类型 } func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() if opts.RepoID > 0 { cond = cond.And( builder.Eq{"cloudbrain.repo_id": opts.RepoID}, ) } if opts.UserID > 0 { cond = cond.And( builder.Eq{"cloudbrain.user_id": opts.UserID}, ) } if (opts.JobID) != "" { cond = cond.And( builder.Eq{"cloudbrain.job_id": opts.JobID}, ) } if (opts.ComputeResource) != "" { cond = cond.And( builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource}, ) } if (opts.Type) >= 0 { cond = cond.And( builder.Eq{"cloudbrain.type": opts.Type}, ) } if len(opts.JobTypes) > 0 { if opts.JobTypeNot { cond = cond.And( builder.NotIn("cloudbrain.job_type", opts.JobTypes), ) } else { cond = cond.And( builder.In("cloudbrain.job_type", opts.JobTypes), ) } } if (opts.AiCenter) != "" { if opts.AiCenter == AICenterOfCloudBrainOne { cond = cond.And( builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, ) } else if opts.AiCenter == AICenterOfCloudBrainTwo { cond = cond.And( builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, ) } else if opts.AiCenter == AICenterOfChengdu { cond = cond.And( builder.Eq{"cloudbrain.type": TypeCDCenter}, ) } else { cond = cond.And( builder.Like{"cloudbrain.ai_center", opts.AiCenter}, ) } } if (opts.Cluster) != "" { if opts.Cluster == "resource_cluster_openi" { cond = cond.And( builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, builder.Eq{"cloudbrain.type": TypeCDCenter}), ) } if opts.Cluster == "resource_cluster_c2net" { cond = cond.And( builder.Eq{"cloudbrain.type": TypeC2Net}, ) } } if (opts.IsLatestVersion) != "" { cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"})) } if len(opts.CloudbrainIDs) > 0 { cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs)) } if len(opts.JobStatus) > 0 { if opts.JobStatusNot { cond = cond.And( builder.NotIn("cloudbrain.status", opts.JobStatus), ) } else { cond = cond.And( builder.In("cloudbrain.status", opts.JobStatus), ) } } if len(opts.RepoIDList) > 0 { cond = cond.And( builder.In("cloudbrain.repo_id", opts.RepoIDList), ) } var count int64 var err error condition := "cloudbrain.user_id = `user`.id" if len(opts.Keyword) == 0 { count, err = sess.Where(cond).Count(new(Cloudbrain)) } else { lowerKeyWord := strings.ToLower(opts.Keyword) cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) count, err = sess.Table(&Cloudbrain{}).Where(cond). Join("left", "`user`", condition).Count(new(CloudbrainInfo)) } if err != nil { return nil, 0, fmt.Errorf("Count: %v", err) } if opts.Page >= 0 && opts.PageSize > 0 { var start int if opts.Page == 0 { start = 0 } else { start = (opts.Page - 1) * opts.PageSize } sess.Limit(opts.PageSize, start) } sess.OrderBy("cloudbrain.created_unix DESC") cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) if err := sess.Table(&Cloudbrain{}).Where(cond). Join("left", "`user`", condition). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } if opts.NeedRepoInfo { var ids []int64 for _, task := range cloudbrains { ids = append(ids, task.RepoID) } repositoryMap, err := GetRepositoriesMapByIDs(ids) if err == nil { for _, task := range cloudbrains { task.Repo = repositoryMap[task.RepoID] } } } return cloudbrains, count, nil } func QueryModelTrainJobVersionList(jobId string) ([]*Cloudbrain, int, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() cond = cond.And( builder.Eq{"cloudbrain.job_id": jobId}, ) cond = cond.And( builder.In("cloudbrain.Status", "COMPLETED", "SUCCEEDED"), //builder.Eq{"cloudbrain.Status": "COMPLETED"}, ) sess.OrderBy("cloudbrain.created_unix DESC") cloudbrains := make([]*Cloudbrain, 0) if err := sess.Table(&Cloudbrain{}).Where(cond). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } return cloudbrains, int(len(cloudbrains)), nil } func QueryModelTrainJobList(repoId int64) ([]*CloudbrainInfo, int, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() cond = cond.And( builder.Eq{"repo_id": repoId}, ) cond = cond.And( builder.In("Status", "COMPLETED", "SUCCEEDED"), ) cond = cond.And( builder.Eq{"job_type": "TRAIN"}, ) // cond = cond.And( // builder.In("type", 0, 1), // ) cloudbrains := make([]*CloudbrainInfo, 0) if err := sess.Select("job_id,display_job_name").Table(&Cloudbrain{}).Where(cond).OrderBy("created_unix DESC"). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } keys := make(map[string]string) uniqueElements := make([]*CloudbrainInfo, 0) for _, entry := range cloudbrains { if _, value := keys[entry.JobID]; !value { keys[entry.JobID] = entry.DisplayJobName uniqueElements = append(uniqueElements, entry) } } return uniqueElements, int(len(uniqueElements)), nil } func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() if opts.RepoID > 0 { cond = cond.And( builder.Eq{"cloudbrain.repo_id": opts.RepoID}, ) } if opts.UserID > 0 { cond = cond.And( builder.Eq{"cloudbrain.user_id": opts.UserID}, ) } if (opts.Type) >= 0 { cond = cond.And( builder.Eq{"cloudbrain.type": opts.Type}, ) } if (opts.JobID) != "" { cond = cond.And( builder.Eq{"cloudbrain.job_id": opts.JobID}, ) } if len(opts.JobTypes) > 0 { cond = cond.And( builder.In("cloudbrain.job_type", opts.JobTypes), ) } if len(opts.CloudbrainIDs) > 0 { cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs)) } count, err := sess.Where(cond).Count(new(Cloudbrain)) if err != nil { return nil, 0, fmt.Errorf("Count: %v", err) } if opts.Page >= 0 && opts.PageSize > 0 { var start int if opts.Page == 0 { start = 0 } else { start = (opts.Page - 1) * opts.PageSize } sess.Limit(opts.PageSize, start) } sess.OrderBy("cloudbrain.created_unix DESC") cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) if err := sess.Table(&Cloudbrain{}).Where(cond). Join("left", "`user`", "cloudbrain.user_id = `user`.id"). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } return cloudbrains, int(count), nil } func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) { session := x.NewSession() defer session.Close() err = session.Begin() cloudbrain.TrainJobDuration = DURATION_STR_ZERO if _, err = session.NoAutoTime().InsertOne(cloudbrain); err != nil { session.Rollback() return err } if cloudbrain.Spec != nil { if _, err = session.Insert(NewCloudBrainSpec(cloudbrain.ID, *cloudbrain.Spec)); err != nil { session.Rollback() return err } } session.Commit() go IncreaseDatasetUseCount(cloudbrain.Uuid) go OperateRepoAITaskNum(cloudbrain.RepoID, 1) return nil } func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) { has, err := x.Get(cb) if err != nil { return nil, err } else if !has { return nil, ErrJobNotExist{} } return cb, nil } func getRepoCloudBrainWithDeleted(cb *Cloudbrain) (*Cloudbrain, error) { has, err := x.Unscoped().Get(cb) if err != nil { return nil, err } else if !has { return nil, ErrJobNotExist{} } return cb, nil } func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) { cb := &Cloudbrain{JobID: jobID, RepoID: repoID} return getRepoCloudBrain(cb) } func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) { cb := &Cloudbrain{JobID: jobID} return getRepoCloudBrain(cb) } func GetCloudbrainByJobIDWithDeleted(jobID string) (*Cloudbrain, error) { cb := &Cloudbrain{JobID: jobID} return getRepoCloudBrainWithDeleted(cb) } func GetCloudbrainByID(id string) (*Cloudbrain, error) { idInt64, _ := strconv.ParseInt(id, 10, 64) cb := &Cloudbrain{ID: idInt64} return getRepoCloudBrain(cb) } func IsCloudbrainExistByJobName(jobName string)(bool,error){ return x.Unscoped().Exist(&Cloudbrain{ JobName: jobName, }) } func GetCloudbrainByIDWithDeleted(id string) (*Cloudbrain, error) { idInt64, _ := strconv.ParseInt(id, 10, 64) cb := &Cloudbrain{ID: idInt64} return getRepoCloudBrainWithDeleted(cb) } func GetCloudbrainByJobIDAndVersionName(jobID string, versionName string) (*Cloudbrain, error) { cb := &Cloudbrain{JobID: jobID, VersionName: versionName} return getRepoCloudBrain(cb) } func GetCloudbrainByJobIDAndIsLatestVersion(jobID string, isLatestVersion string) (*Cloudbrain, error) { cb := &Cloudbrain{JobID: jobID, IsLatestVersion: isLatestVersion} return getRepoCloudBrain(cb) } func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) err := x.Cols("job_id", "status", "type", "job_type", "version_id", "start_time").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains) return cloudBrains, err } func GetModelartsReDebugTaskByJobId(jobID string) ([]*Cloudbrain, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() cond = cond.And( builder.Eq{"cloudbrain.job_id": jobID}, ) sess.OrderBy("cloudbrain.created_unix ASC limit 1") cloudbrains := make([]*Cloudbrain, 0, 10) if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). Find(&cloudbrains); err != nil { log.Info("find error.") } return cloudbrains, nil } func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) err := x.Cols("job_id", "status", "type", "job_type", "version_id", "start_time").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains) return cloudBrains, err } func GetCloudbrainsNeededDeleteByRepoID(repoID int64) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) err := x.Where("repo_id=?", repoID).Find(&cloudBrains) return cloudBrains, err } func GetCloudbrainsByDisplayJobName(repoID int64, jobType string, displayJobName string) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) err := x.Cols("job_id", "job_name", "repo_id", "user_id", "job_type", "display_job_name").Where("repo_id=? AND job_type =? AND lower(display_job_name) = lower(?)", repoID, jobType, displayJobName).Find(&cloudBrains) return cloudBrains, err } func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) { cb := &Cloudbrain{JobID: jobID, Status: string(status)} _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb) return } func SetTrainJobStatusByJobID(jobID string, status string, duration int64, trainjobduration string) (err error) { cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration} _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb) return } func SetVersionCountAndLatestVersion(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) { cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount} _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) return } func UpdateJob(job *Cloudbrain) error { return updateJob(x, job) } func UpdateJobDurationWithDeleted(job *Cloudbrain) error { _, err := x.Exec("update cloudbrain set start_time=?, end_time=?,train_job_duration=?,duration=? where id=?", job.StartTime, job.EndTime, job.TrainJobDuration, job.Duration, job.ID) return err } func updateJob(e Engine, job *Cloudbrain) error { _, err := e.ID(job.ID).AllCols().Update(job) return err } func UpdateTrainJobVersion(job *Cloudbrain) error { return updateJobTrainVersion(x, job) } func updateJobTrainVersion(e Engine, job *Cloudbrain) error { var sess *xorm.Session sess = e.Where("job_id = ? AND version_name=?", job.JobID, job.VersionName) _, err := sess.Cols("status", "train_job_duration", "duration", "start_time", "end_time", "created_unix", "ai_center").Update(job) return err } func DeleteJob(job *Cloudbrain) error { return deleteJob(x, job) } func deleteJob(e Engine, job *Cloudbrain) error { _, err := e.ID(job.ID).Delete(job) if err == nil { go updateAITaskNumWhenDeleteJob(job) } return err } func updateAITaskNumWhenDeleteJob(job *Cloudbrain) { repoId := job.RepoID if repoId == 0 { t := &Cloudbrain{} _, tempErr := x.ID(job.ID).Unscoped().Get(t) if tempErr != nil { log.Error("updateAITaskNumWhenDeleteJob error.%v", tempErr) return } repoId = t.RepoID } if repoId > 0 { go OperateRepoAITaskNum(repoId, -1) } } func GetCloudbrainByName(jobName string) (*Cloudbrain, error) { cb := &Cloudbrain{JobName: jobName} return getRepoCloudBrain(cb) } func CanDelJob(isSigned bool, user *User, job *CloudbrainInfo) bool { if !isSigned || (job.Status != string(JobStopped) && job.Status != string(JobFailed) && job.Status != string(ModelArtsStartFailed) && job.Status != string(ModelArtsCreateFailed)) { return false } repo, err := GetRepositoryByID(job.RepoID) if err != nil { log.Error("GetRepositoryByID failed:%v", err.Error()) return false } permission, _ := GetUserRepoPermission(repo, user) if err != nil { log.Error("GetUserRepoPermission failed:%v", err.Error()) return false } if (user.ID == job.UserID && permission.AccessMode >= AccessModeWrite) || user.IsAdmin || permission.AccessMode >= AccessModeAdmin { return true } return false } func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) return cloudbrains, x. NotIn("status", JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted, ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed, ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed). Limit(100). Find(&cloudbrains) } func GetCloudBrainOneStoppedNotDebugJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0, 10) endTimeBefore := time.Now().Unix() - int64(days)*24*3600 missEndTimeBefore := endTimeBefore - 24*3600 return cloudbrains, x.Unscoped().Cols("id,job_name,job_id"). In("status", JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted, ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed, ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed). Where("(((end_time is null or end_time=0) and updated_unix 0 { idsIn := "" for i, id := range tempIds { if i == 0 { idsIn += strconv.FormatInt(id, 10) } else { idsIn += "," + strconv.FormatInt(id, 10) } } _, errTemp := x.Unscoped().Exec("update cloudbrain set cleared=true where id in (" + idsIn + ")") if errTemp != nil { err = errTemp } } } return err } func getPageIds(ids []int64, page int, pagesize int) []int64 { begin := (page - 1) * pagesize end := (page) * pagesize if begin > len(ids)-1 { return []int64{} } if end > len(ids)-1 { return ids[begin:] } else { return ids[begin:end] } } func GetStoppedJobWithNoDurationJob() ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0) return cloudbrains, x. In("status", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded). Where("train_job_duration is null or train_job_duration = '' "). Limit(100). Find(&cloudbrains) } func GetStoppedJobWithNoStartTimeEndTime() ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0) return cloudbrains, x.SQL("select * from cloudbrain where status in (?,?,?,?,?,?,?) and (start_time is null or end_time is null) limit 100", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded).Find(&cloudbrains) } func GetC2NetWithAiCenterWrongJob() ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0) return cloudbrains, x. In("status", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded). Where("type = ?", TypeC2Net). Find(&cloudbrains) } func GetModelSafetyTestTask() ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0) sess := x.Where("job_type=?", string(JobTypeModelSafety)) err := sess.Find(&cloudbrains) return cloudbrains, err } func GetCloudbrainRunCountByRepoID(repoID int64) (int, error) { count, err := x.In("status", JobWaiting, JobRunning, ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsDeleting, ModelArtsRestarting, ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsStopping, ModelArtsResizing, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobKilling, ModelArtsTrainJobCheckRunningCompleted).And("repo_id = ?", repoID).Count(new(Cloudbrain)) return int(count), err } func GetModelSafetyCountByUserID(userID int64) (int, error) { count, err := x.In("status", JobWaiting, JobRunning, ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted).And("job_type = ? and user_id = ?", string(JobTypeModelSafety), userID).Count(new(Cloudbrain)) return int(count), err } func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTypes ...JobType) (int64, error) { sess := x.Where("status=? and type=?", JobWaiting, cloudbrainType) if len(jobTypes) > 0 { sess.In("job_type", jobTypes) } if computeResource != "" { sess.And("compute_resource=?", computeResource) } return sess.Count(new(Cloudbrain)) } func GetNotFinalStatusTaskCount(userID int64, notFinalStatus []string, jobTypes []JobType, cloudbrainTypes []int, computeResource string) (int, error) { count, err := x.In("status", notFinalStatus). In("job_type", jobTypes). In("type", cloudbrainTypes). And("user_id = ? and compute_resource = ?", userID, computeResource).Count(new(Cloudbrain)) return int(count), err } func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { sess := x.NewSession() defer sess.Close() if err = sess.Begin(); err != nil { return err } if _, err = sess.Delete(old); err != nil { sess.Rollback() return err } if _, err = sess.NoAutoTime().InsertOne(new); err != nil { sess.Rollback() return err } if new.Spec != nil { if _, err = sess.Insert(NewCloudBrainSpec(new.ID, *new.Spec)); err != nil { sess.Rollback() return err } } if err = sess.Commit(); err != nil { return err } go IncreaseDatasetUseCount(new.Uuid) return nil } func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() if opts.RepoID > 0 { cond = cond.And( builder.Eq{"cloudbrain.repo_id": opts.RepoID}, ) } if opts.UserID > 0 { cond = cond.And( builder.Eq{"cloudbrain.user_id": opts.UserID}, ) } if (opts.JobID) != "" { cond = cond.And( builder.Eq{"cloudbrain.job_id": opts.JobID}, ) } if (opts.ComputeResource) != "" { cond = cond.And( builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource}, ) } if (opts.Type) >= 0 { cond = cond.And( builder.Eq{"cloudbrain.type": opts.Type}, ) } if len(opts.JobTypes) > 0 { if opts.JobTypeNot { cond = cond.And( builder.NotIn("cloudbrain.job_type", opts.JobTypes), ) } else { cond = cond.And( builder.In("cloudbrain.job_type", opts.JobTypes), ) } } if (opts.AiCenter) != "" { cond = cond.And( builder.Like{"cloudbrain.ai_center", opts.AiCenter}, ) } if (opts.NeedDeleteInfo) != "" { if opts.NeedDeleteInfo == "yes" { cond = cond.And( builder.And(builder.NotNull{"cloudbrain.deleted_at"}), ) } if opts.NeedDeleteInfo == "no" { cond = cond.And( builder.And(builder.IsNull{"cloudbrain.deleted_at"}), ) } } if (opts.IsLatestVersion) != "" { cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"})) } if len(opts.CloudbrainIDs) > 0 { cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs)) } if len(opts.JobStatus) > 0 { if opts.JobStatusNot { cond = cond.And( builder.NotIn("cloudbrain.status", opts.JobStatus), ) } else { cond = cond.And( builder.In("cloudbrain.status", opts.JobStatus), ) } } if len(opts.RepoIDList) > 0 { cond = cond.And( builder.In("cloudbrain.repo_id", opts.RepoIDList), ) } if opts.BeginTimeUnix > 0 && opts.EndTimeUnix > 0 { cond = cond.And( builder.And(builder.Gte{"cloudbrain.created_unix": opts.BeginTimeUnix}, builder.Lte{"cloudbrain.created_unix": opts.EndTimeUnix}), ) } var count int64 var err error condition := "cloudbrain.user_id = `user`.id" if len(opts.Keyword) == 0 { count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain)) } else { lowerKeyWord := strings.ToLower(opts.Keyword) cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). Join("left", "`user`", condition).Count(new(CloudbrainInfo)) } if err != nil { return nil, 0, fmt.Errorf("Count: %v", err) } if opts.Page >= 0 && opts.PageSize > 0 { var start int if opts.Page == 0 { start = 0 } else { start = (opts.Page - 1) * opts.PageSize } sess.Limit(opts.PageSize, start) } sess.OrderBy("cloudbrain.created_unix DESC") cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). Join("left", "`user`", condition). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } if opts.NeedRepoInfo { var ids []int64 for _, task := range cloudbrains { ids = append(ids, task.RepoID) } repositoryMap, err := GetRepositoriesMapByIDs(ids) if err == nil { for _, task := range cloudbrains { task.Repo = repositoryMap[task.RepoID] } } } return cloudbrains, count, nil } func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() if (opts.Type) >= 0 { cond = cond.And( builder.Eq{"cloudbrain.type": opts.Type}, ) } if opts.BeginTimeUnix > 0 && opts.EndTimeUnix > 0 { cond = cond.And( builder.And(builder.Gte{"cloudbrain.created_unix": opts.BeginTimeUnix}, builder.Lte{"cloudbrain.created_unix": opts.EndTimeUnix}), ) } var count int64 var err error count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain)) if err != nil { return nil, 0, fmt.Errorf("Count: %v", err) } if opts.Page >= 0 && opts.PageSize > 0 { var start int if opts.Page == 0 { start = 0 } else { start = (opts.Page - 1) * opts.PageSize } sess.Limit(opts.PageSize, start) } // sess.OrderBy("cloudbrain.created_unix DESC") cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) if err := sess.Table(&Cloudbrain{}).Unscoped().Where(cond). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } if opts.NeedRepoInfo { var ids []int64 for _, task := range cloudbrains { ids = append(ids, task.RepoID) } repositoryMap, err := GetRepositoriesMapByIDs(ids) if err == nil { for _, task := range cloudbrains { task.Repo = repositoryMap[task.RepoID] } } } return cloudbrains, count, nil } func CloudbrainAllKanBan(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { sess := x.NewSession() defer sess.Close() var cond = builder.NewCond() if (opts.Type) >= 0 { cond = cond.And( builder.Eq{"cloudbrain.type": opts.Type}, ) } if opts.BeginTimeUnix > 0 && opts.EndTimeUnix > 0 { cond = cond.And( builder.And(builder.Gte{"cloudbrain.created_unix": opts.BeginTimeUnix}, builder.Lte{"cloudbrain.created_unix": opts.EndTimeUnix}), ) } var count int64 var err error count, err = sess.Unscoped().Where(cond).Count(new(Cloudbrain)) if err != nil { return nil, 0, fmt.Errorf("Count: %v", err) } if opts.Page >= 0 && opts.PageSize > 0 { var start int if opts.Page == 0 { start = 0 } else { start = (opts.Page - 1) * opts.PageSize } sess.Limit(opts.PageSize, start) } // sess.OrderBy("cloudbrain.created_unix DESC") cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) if err := sess.Cols("id", "type", "work_server_number", "duration", "train_job_duration", "ai_center", "cluster").Table(&Cloudbrain{}).Unscoped().Where(cond). Find(&cloudbrains); err != nil { return nil, 0, fmt.Errorf("Find: %v", err) } if opts.NeedRepoInfo { var ids []int64 for _, task := range cloudbrains { ids = append(ids, task.RepoID) } repositoryMap, err := GetRepositoriesMapByIDs(ids) if err == nil { for _, task := range cloudbrains { task.Repo = repositoryMap[task.RepoID] } } } return cloudbrains, count, nil } func GetStartedCloudbrainTaskByUpdatedUnix(startTime, endTime time.Time) ([]Cloudbrain, error) { r := make([]Cloudbrain, 0) err := x.Where("updated_unix >= ? and updated_unix <= ? and start_time > 0", startTime.Unix(), endTime.Unix()).Unscoped().Find(&r) if err != nil { return nil, err } return r, nil } func GetCloudbrainByIds(ids []int64) ([]*Cloudbrain, error) { if len(ids) == 0 { return nil, nil } cloudbrains := make([]*Cloudbrain, 0) err := x.In("id", ids).Unscoped().Find(&cloudbrains) if err != nil { return nil, err } return cloudbrains, nil } type DatasetInfo struct { DataLocalPath string Name string FullName string } func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { var datasetNames string uuids := strings.Split(uuidStr, ";") if len(uuids) > setting.MaxDatasetNum { log.Error("the dataset count(%d) exceed the limit", len(uuids)) return nil, datasetNames, errors.New("the dataset count exceed the limit") } datasetInfos := make(map[string]DatasetInfo) attachs, err := GetAttachmentsByUUIDs(uuids) if err != nil { log.Error("GetAttachmentsByUUIDs failed: %v", err) return nil, datasetNames, err } for i, tmpUuid := range uuids { var attach *Attachment for _, tmpAttach := range attachs { if tmpAttach.UUID == tmpUuid { attach = tmpAttach break } } if attach == nil { log.Error("GetAttachmentsByUUIDs failed: %v", err) return nil, datasetNames, err } fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz") for _, datasetInfo := range datasetInfos { if fileName == datasetInfo.Name { log.Error("the dataset name is same: %v", attach.Name) return nil, datasetNames, errors.New("the dataset name is same") } } var dataLocalPath string if len(grampusType) > 0 { if grampusType[0] == GPU { dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID } else { dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" } } else { dataLocalPath = setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + AttachmentRelativePath(attach.UUID) + attach.UUID } datasetInfos[attach.UUID] = DatasetInfo{ DataLocalPath: dataLocalPath, Name: fileName, FullName: attach.Name, } if i == 0 { datasetNames = attach.Name } else { datasetNames += ";" + attach.Name } } return datasetInfos, datasetNames, nil } var ( SpecsMapInitFlag = false CloudbrainDebugResourceSpecsMap map[int]*ResourceSpec CloudbrainTrainResourceSpecsMap map[int]*ResourceSpec CloudbrainInferenceResourceSpecsMap map[int]*ResourceSpec CloudbrainBenchmarkResourceSpecsMap map[int]*ResourceSpec CloudbrainSpecialResourceSpecsMap map[int]*ResourceSpec GpuInfosMapInitFlag = false CloudbrainDebugGpuInfosMap map[string]*GpuInfo CloudbrainTrainGpuInfosMap map[string]*GpuInfo CloudbrainInferenceGpuInfosMap map[string]*GpuInfo CloudbrainBenchmarkGpuInfosMap map[string]*GpuInfo CloudbrainSpecialGpuInfosMap map[string]*GpuInfo ) func GetNewestJobsByAiCenter() ([]int64, error) { ids := make([]int64, 0) return ids, x. Select("max(id) as id"). Where("type=? and ai_center!='' and ai_center is not null", TypeC2Net). GroupBy("ai_center"). Table(Cloudbrain{}). Find(&ids) } func GetNewestJobsByType() ([]int64, error) { ids := make([]int64, 0) return ids, x. Select("max(id) as id"). In("type", TypeCloudBrainOne, TypeCloudBrainTwo). GroupBy("type"). Table(Cloudbrain{}). Find(&ids) } func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0) return cloudbrains, x. In("id", ids). Find(&cloudbrains) } func GetCloudbrainWithDeletedByIDs(ids []int64) ([]*Cloudbrain, error) { cloudbrains := make([]*Cloudbrain, 0) return cloudbrains, x. In("id", ids).Unscoped().Find(&cloudbrains) } func GetCloudbrainCountByJobName(jobName, jobType string, typeCloudbrain int) (int, error) { count, err := x.Where("job_name = ? and job_type= ? and type = ?", jobName, jobType, typeCloudbrain).Count(new(Cloudbrain)) return int(count), err } func LoadSpecs(tasks []*Cloudbrain) error { cloudbrainIds := make([]int64, len(tasks)) for i, v := range tasks { cloudbrainIds[i] = v.ID } specs := make([]*CloudbrainSpec, 0) err := x.In("cloudbrain_id", cloudbrainIds).Find(&specs) if err != nil { return err } specMap := make(map[int64]*CloudbrainSpec) for _, v := range specs { specMap[v.SpecId] = v } for _, v := range tasks { if specMap[v.ID] != nil { v.Spec = specMap[v.ID].ConvertToSpecification() } } return nil } func LoadSpecs4CloudbrainInfo(tasks []*CloudbrainInfo) error { cloudbrainIds := make([]int64, len(tasks)) for i, v := range tasks { cloudbrainIds[i] = v.Cloudbrain.ID } specs := make([]*CloudbrainSpec, 0) err := x.In("cloudbrain_id", cloudbrainIds).Find(&specs) if err != nil { return err } specMap := make(map[int64]*CloudbrainSpec) for _, v := range specs { specMap[v.CloudbrainID] = v } for _, v := range tasks { if specMap[v.Cloudbrain.ID] != nil { v.Cloudbrain.Spec = specMap[v.Cloudbrain.ID].ConvertToSpecification() } } return nil }