package modelarts import ( "encoding/json" "path" "strconv" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" ) const ( //notebook storageTypeOBS = "obs" autoStopDuration = 4 * 60 * 60 DataSetMountPath = "/home/ma-user/work" NotebookEnv = "Python3" NotebookType = "Ascend" FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" //train-job // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" // Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}" // EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," + // "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," + // "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," + // "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" + // "]}" // TrainJobFlavorInfo = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + // "]}" CodePath = "/code/" OutputPath = "/output/" LogPath = "/log/" JobPath = "/job/" OrderDesc = "desc" //向下查询 OrderAsc = "asc" //向上查询 Lines = 20 TrainUrl = "train_url" DataUrl = "data_url" PerPage = 10 SortByCreateTime = "create_time" ConfigTypeCustom = "custom" ) var ( poolInfos *models.PoolInfos FlavorInfos *models.FlavorInfos ) type GenerateTrainJobReq struct { JobName string Uuid string Description string CodeObsPath string BootFile string DataUrl string TrainUrl string FlavorCode string LogUrl string PoolID string WorkServerNumber int EngineID int64 Parameters []models.Parameter } type VersionInfo struct { Version []struct { ID int `json:"id"` Value string `json:"value"` } `json:"version"` } type Flavor struct { Info []struct { Code string `json:"code"` Value string `json:"value"` } `json:"flavor"` } type Engine struct { Info []struct { ID int `json:"id"` Value string `json:"value"` } `json:"engine"` } type ResourcePool struct { Info []struct { ID string `json:"id"` Value string `json:"value"` } `json:"resource_pool"` } func GenerateTask(ctx *context.Context, jobName, uuid, description string) error { var dataActualPath string if uuid != "" { dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" } else { userPath := setting.UserBasePath + ctx.User.Name + "/" isExist, err := storage.ObsHasObject(userPath) if err != nil { log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"]) return err } if !isExist { if err = storage.ObsCreateObject(userPath); err != nil { log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"]) return err } } dataActualPath = setting.Bucket + "/" + userPath } if poolInfos == nil { json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) } jobResult, err := CreateJob(models.CreateNotebookParams{ JobName: jobName, Description: description, ProfileID: setting.ProfileID, Flavor: setting.Flavor, Pool: models.Pool{ ID: poolInfos.PoolInfo[0].PoolId, Name: poolInfos.PoolInfo[0].PoolName, Type: poolInfos.PoolInfo[0].PoolType, }, Spec: models.Spec{ Storage: models.Storage{ Type: storageTypeOBS, Location: models.Location{ Path: dataActualPath, }, }, AutoStop: models.AutoStop{ Enable: true, Duration: autoStopDuration, }, }, }) if err != nil { log.Error("CreateJob failed: %v", err.Error()) return err } err = models.CreateCloudbrain(&models.Cloudbrain{ Status: string(models.JobWaiting), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, JobID: jobResult.ID, JobName: jobName, JobType: string(models.JobTypeDebug), Type: models.TypeCloudBrainNotebook, Uuid: uuid, }) if err != nil { return err } return nil } func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { jobResult, err := createTrainJob(models.CreateTrainJobParams{ JobName: req.JobName, Description: req.Description, Config: models.Config{ WorkServerNum: req.WorkServerNumber, AppUrl: req.CodeObsPath, BootFileUrl: req.BootFile, DataUrl: req.DataUrl, EngineID: req.EngineID, TrainUrl: req.TrainUrl, LogUrl: req.LogUrl, PoolID: req.PoolID, CreateVersion: true, Flavor: models.Flavor{ Code: req.FlavorCode, }, Parameter: req.Parameters, }, }) if err != nil { log.Error("CreateJob failed: %v", err.Error()) return err } attach, err := models.GetAttachmentByUUID(req.Uuid) if err != nil { log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) return nil } err = models.CreateCloudbrain(&models.Cloudbrain{ Status: TransTrainJobStatus(jobResult.Status), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, JobID: strconv.FormatInt(jobResult.JobID, 10), JobName: req.JobName, JobType: string(models.JobTypeDebug), Type: models.TypeCloudBrainTrainJob, VersionID: jobResult.VersionID, VersionName: jobResult.VersionName, Uuid: req.Uuid, DatasetName: attach.Name, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) return err } return nil } func TransTrainJobStatus(status int) string { switch status { case 0: return "UNKNOWN" case 1: return "INIT" case 2: return "IMAGE_CREATING" case 3: return "IMAGE_FAILED" case 4: return "SUBMIT_TRYING" case 5: return "SUBMIT_FAILED" case 6: return "DELETE_FAILED" case 7: return "WAITING" case 8: return "RUNNING" case 9: return "KILLING" case 10: return "COMPLETED" case 11: return "FAILED" case 12: return "KILLED" case 13: return "CANCELED" case 14: return "LOST" case 15: return "SCALING" case 16: return "SUBMIT_MODEL_FAILED" case 17: return "DEPLOY_SERVICE_FAILED" case 18: return "CHECK_INIT" case 19: return "CHECK_RUNNING" case 20: return "CHECK_RUNNING_COMPLETED" case 21: return "CHECK_FAILED" default: return strconv.Itoa(status) } return "" }