package grampus import ( "code.gitea.io/gitea/modules/timeutil" "strconv" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification" ) const ( //notebook storageTypeOBS = "obs" autoStopDuration = 4 * 60 * 60 autoStopDurationMs = 4 * 60 * 60 * 1000 DataSetMountPath = "/home/ma-user/work" NotebookEnv = "Python3" NotebookType = "Ascend" FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" //train-job // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" // Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}" // EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," + // "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," + // "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," + // "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" + // "]}" // TrainJobFlavorInfo = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + // "]}" CodePath = "/code/" OutputPath = "/output/" ResultPath = "/result/" LogPath = "/log/" JobPath = "/job/" OrderDesc = "desc" //向下查询 OrderAsc = "asc" //向上查询 Lines = 500 TrainUrl = "train_url" DataUrl = "data_url" ResultUrl = "result_url" CkptUrl = "ckpt_url" DeviceTarget = "device_target" Ascend = "Ascend" PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" VersionCount = 1 SortByCreateTime = "create_time" ConfigTypeCustom = "custom" TotalVersionCount = 1 ) var ( poolInfos *models.PoolInfos FlavorInfos *models.FlavorInfos ImageInfos *models.ImageInfosModelArts ) type GenerateTrainJobReq struct { JobName string DisplayJobName string Uuid string Description string CodeObsPath string BootFile string BootFileUrl string DataUrl string TrainUrl string FlavorCode string LogUrl string PoolID string WorkServerNumber int EngineID int64 Parameters []models.Parameter CommitID string IsLatestVersion string Params string BranchName string PreVersionId int64 PreVersionName string FlavorName string VersionCount int EngineName string TotalVersionCount int } type GenerateInferenceJobReq struct { JobName string DisplayJobName string Uuid string Description string CodeObsPath string BootFile string BootFileUrl string DataUrl string TrainUrl string FlavorCode string LogUrl string PoolID string WorkServerNumber int EngineID int64 Parameters []models.Parameter CommitID string Params string BranchName string FlavorName string EngineName string LabelName string IsLatestVersion string VersionCount int TotalVersionCount int ModelName string ModelVersion string CkptName string ResultUrl string } type VersionInfo struct { Version []struct { ID int `json:"id"` Value string `json:"value"` } `json:"version"` } type Flavor struct { Info []struct { Code string `json:"code"` Value string `json:"value"` } `json:"flavor"` } type Engine struct { Info []struct { ID int `json:"id"` Value string `json:"value"` } `json:"engine"` } type ResourcePool struct { Info []struct { ID string `json:"id"` Value string `json:"value"` } `json:"resource_pool"` } // type Parameter struct { // Label string `json:"label"` // Value string `json:"value"` // } // type Parameters struct { // Parameter []Parameter `json:"parameter"` // } type Parameters struct { Parameter []struct { Label string `json:"label"` Value string `json:"value"` } `json:"parameter"` } func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { createTime := timeutil.TimeStampNow() jobResult, err := createTrainJob(models.CreateTrainJobParams{ JobName: req.JobName, Description: req.Description, Config: models.Config{ WorkServerNum: req.WorkServerNumber, AppUrl: req.CodeObsPath, BootFileUrl: req.BootFileUrl, DataUrl: req.DataUrl, EngineID: req.EngineID, TrainUrl: req.TrainUrl, LogUrl: req.LogUrl, PoolID: req.PoolID, CreateVersion: true, Flavor: models.Flavor{ Code: req.FlavorCode, }, Parameter: req.Parameters, }, }) if err != nil { log.Error("CreateJob failed: %v", err.Error()) return err } attach, err := models.GetAttachmentByUUID(req.Uuid) if err != nil { log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) return err } jobId := strconv.FormatInt(jobResult.JobID, 10) err = models.CreateCloudbrain(&models.Cloudbrain{ Status: TransTrainJobStatus(jobResult.Status), UserID: ctx.User.ID, RepoID: ctx.Repo.Repository.ID, JobID: jobId, JobName: req.JobName, DisplayJobName: req.DisplayJobName, JobType: string(models.JobTypeTrain), Type: models.TypeCloudBrainTwo, VersionID: jobResult.VersionID, VersionName: jobResult.VersionName, Uuid: req.Uuid, DatasetName: attach.Name, CommitID: req.CommitID, IsLatestVersion: req.IsLatestVersion, ComputeResource: models.NPUResource, EngineID: req.EngineID, TrainUrl: req.TrainUrl, BranchName: req.BranchName, Parameters: req.Params, BootFile: req.BootFile, DataUrl: req.DataUrl, LogUrl: req.LogUrl, FlavorCode: req.FlavorCode, Description: req.Description, WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, EngineName: req.EngineName, VersionCount: req.VersionCount, TotalVersionCount: req.TotalVersionCount, CreatedUnix: createTime, UpdatedUnix: createTime, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) return err } notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask) return nil } func TransTrainJobStatus(status int) string { switch status { case 0: return "UNKNOWN" case 1: return "INIT" case 2: return "IMAGE_CREATING" case 3: return "IMAGE_FAILED" case 4: return "SUBMIT_TRYING" case 5: return "SUBMIT_FAILED" case 6: return "DELETE_FAILED" case 7: return "WAITING" case 8: return "RUNNING" case 9: return "KILLING" case 10: return "COMPLETED" case 11: return "FAILED" case 12: return "KILLED" case 13: return "CANCELED" case 14: return "LOST" case 15: return "SCALING" case 16: return "SUBMIT_MODEL_FAILED" case 17: return "DEPLOY_SERVICE_FAILED" case 18: return "CHECK_INIT" case 19: return "CHECK_RUNNING" case 20: return "CHECK_RUNNING_COMPLETED" case 21: return "CHECK_FAILED" default: return strconv.Itoa(status) } }