|
- package grampus
-
- import (
- "code.gitea.io/gitea/modules/timeutil"
- "strconv"
-
- "code.gitea.io/gitea/models"
- "code.gitea.io/gitea/modules/context"
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/notification"
- )
-
- const (
- //notebook
- storageTypeOBS = "obs"
- autoStopDuration = 4 * 60 * 60
- autoStopDurationMs = 4 * 60 * 60 * 1000
-
- DataSetMountPath = "/home/ma-user/work"
- NotebookEnv = "Python3"
- NotebookType = "Ascend"
- FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
-
- //train-job
- // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
- // Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}"
- // EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," +
- // "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," +
- // "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," +
- // "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" +
- // "]}"
- // TrainJobFlavorInfo = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," +
- // "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," +
- // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," +
- // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" +
- // "]}"
- CodePath = "/code/"
- OutputPath = "/output/"
- ResultPath = "/result/"
- LogPath = "/log/"
- JobPath = "/job/"
- OrderDesc = "desc" //向下查询
- OrderAsc = "asc" //向上查询
- Lines = 500
- TrainUrl = "train_url"
- DataUrl = "data_url"
- ResultUrl = "result_url"
- CkptUrl = "ckpt_url"
- DeviceTarget = "device_target"
- Ascend = "Ascend"
- PerPage = 10
- IsLatestVersion = "1"
- NotLatestVersion = "0"
- VersionCount = 1
-
- SortByCreateTime = "create_time"
- ConfigTypeCustom = "custom"
- TotalVersionCount = 1
- )
-
- var (
- poolInfos *models.PoolInfos
- FlavorInfos *models.FlavorInfos
- ImageInfos *models.ImageInfosModelArts
- )
-
- type GenerateTrainJobReq struct {
- JobName string
- DisplayJobName string
- Uuid string
- Description string
- CodeObsPath string
- BootFile string
- BootFileUrl string
- DataUrl string
- TrainUrl string
- FlavorCode string
- LogUrl string
- PoolID string
- WorkServerNumber int
- EngineID int64
- Parameters []models.Parameter
- CommitID string
- IsLatestVersion string
- Params string
- BranchName string
- PreVersionId int64
- PreVersionName string
- FlavorName string
- VersionCount int
- EngineName string
- TotalVersionCount int
- }
-
- type GenerateInferenceJobReq struct {
- JobName string
- DisplayJobName string
- Uuid string
- Description string
- CodeObsPath string
- BootFile string
- BootFileUrl string
- DataUrl string
- TrainUrl string
- FlavorCode string
- LogUrl string
- PoolID string
- WorkServerNumber int
- EngineID int64
- Parameters []models.Parameter
- CommitID string
- Params string
- BranchName string
- FlavorName string
- EngineName string
- LabelName string
- IsLatestVersion string
- VersionCount int
- TotalVersionCount int
- ModelName string
- ModelVersion string
- CkptName string
- ResultUrl string
- }
-
- type VersionInfo struct {
- Version []struct {
- ID int `json:"id"`
- Value string `json:"value"`
- } `json:"version"`
- }
-
- type Flavor struct {
- Info []struct {
- Code string `json:"code"`
- Value string `json:"value"`
- } `json:"flavor"`
- }
-
- type Engine struct {
- Info []struct {
- ID int `json:"id"`
- Value string `json:"value"`
- } `json:"engine"`
- }
-
- type ResourcePool struct {
- Info []struct {
- ID string `json:"id"`
- Value string `json:"value"`
- } `json:"resource_pool"`
- }
-
- // type Parameter struct {
- // Label string `json:"label"`
- // Value string `json:"value"`
- // }
-
- // type Parameters struct {
- // Parameter []Parameter `json:"parameter"`
- // }
-
- type Parameters struct {
- Parameter []struct {
- Label string `json:"label"`
- Value string `json:"value"`
- } `json:"parameter"`
- }
-
- func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
- createTime := timeutil.TimeStampNow()
- jobResult, err := createTrainJob(models.CreateTrainJobParams{
- JobName: req.JobName,
- Description: req.Description,
- Config: models.Config{
- WorkServerNum: req.WorkServerNumber,
- AppUrl: req.CodeObsPath,
- BootFileUrl: req.BootFileUrl,
- DataUrl: req.DataUrl,
- EngineID: req.EngineID,
- TrainUrl: req.TrainUrl,
- LogUrl: req.LogUrl,
- PoolID: req.PoolID,
- CreateVersion: true,
- Flavor: models.Flavor{
- Code: req.FlavorCode,
- },
- Parameter: req.Parameters,
- },
- })
- if err != nil {
- log.Error("CreateJob failed: %v", err.Error())
- return err
- }
-
- attach, err := models.GetAttachmentByUUID(req.Uuid)
- if err != nil {
- log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
- return err
- }
- jobId := strconv.FormatInt(jobResult.JobID, 10)
- err = models.CreateCloudbrain(&models.Cloudbrain{
- Status: TransTrainJobStatus(jobResult.Status),
- UserID: ctx.User.ID,
- RepoID: ctx.Repo.Repository.ID,
- JobID: jobId,
- JobName: req.JobName,
- DisplayJobName: req.DisplayJobName,
- JobType: string(models.JobTypeTrain),
- Type: models.TypeCloudBrainTwo,
- VersionID: jobResult.VersionID,
- VersionName: jobResult.VersionName,
- Uuid: req.Uuid,
- DatasetName: attach.Name,
- CommitID: req.CommitID,
- IsLatestVersion: req.IsLatestVersion,
- ComputeResource: models.NPUResource,
- EngineID: req.EngineID,
- TrainUrl: req.TrainUrl,
- BranchName: req.BranchName,
- Parameters: req.Params,
- BootFile: req.BootFile,
- DataUrl: req.DataUrl,
- LogUrl: req.LogUrl,
- FlavorCode: req.FlavorCode,
- Description: req.Description,
- WorkServerNumber: req.WorkServerNumber,
- FlavorName: req.FlavorName,
- EngineName: req.EngineName,
- VersionCount: req.VersionCount,
- TotalVersionCount: req.TotalVersionCount,
- CreatedUnix: createTime,
- UpdatedUnix: createTime,
- })
-
- if err != nil {
- log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
- return err
- }
- notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask)
- return nil
- }
-
- func TransTrainJobStatus(status int) string {
- switch status {
- case 0:
- return "UNKNOWN"
- case 1:
- return "INIT"
- case 2:
- return "IMAGE_CREATING"
- case 3:
- return "IMAGE_FAILED"
- case 4:
- return "SUBMIT_TRYING"
- case 5:
- return "SUBMIT_FAILED"
- case 6:
- return "DELETE_FAILED"
- case 7:
- return "WAITING"
- case 8:
- return "RUNNING"
- case 9:
- return "KILLING"
- case 10:
- return "COMPLETED"
- case 11:
- return "FAILED"
- case 12:
- return "KILLED"
- case 13:
- return "CANCELED"
- case 14:
- return "LOST"
- case 15:
- return "SCALING"
- case 16:
- return "SUBMIT_MODEL_FAILED"
- case 17:
- return "DEPLOY_SERVICE_FAILED"
- case 18:
- return "CHECK_INIT"
- case 19:
- return "CHECK_RUNNING"
- case 20:
- return "CHECK_RUNNING_COMPLETED"
- case 21:
- return "CHECK_FAILED"
-
- default:
- return strconv.Itoa(status)
- }
- }
|