diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 04c4dbac3..a061857c4 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -69,8 +69,8 @@ type Cloudbrain struct { CanDel bool `xorm:"-"` Type int `xorm:"INDEX DEFAULT 0"` - VersionID int64 `xorm:"INDEX DEFAULT 0"` - VersionName string + VersionID int64 `xorm:"INDEX DEFAULT 0"` + VersionName string `xorm:"INDEX"` Uuid string DatasetName string VersionCount int64 `xorm:"INDEX DEFAULT 1"` @@ -80,6 +80,18 @@ type Cloudbrain struct { ComputeResource string EngineID int64 + TrainUrl string + BranchName string + Parameters string + BootFile string + DataUrl string + LogUrl string + PreVersionId int64 + FlavorCode string + Description string + WorkServerNumber int + FlavorName string + User *User `xorm:"-"` Repo *Repository `xorm:"-"` } diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index a53661b74..3cd8ac637 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -40,6 +40,7 @@ type CreateModelArtsTrainJobForm struct { PrameterDescription string `form:"parameter_description"` BranchName string `form:"branch_name" binding:"Required"` VersionName string `form:"version_name" binding:"Required"` + FlavorName string `form:"flavor_name" binding:"Required"` } func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 9f7b67c06..88378ab10 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -35,19 +35,20 @@ const ( // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + // "]}" - CodePath = "/code/" - OutputPath = "/output/" - LogPath = "/log/" - JobPath = "/job/" - OrderDesc = "desc" //向下查询 - OrderAsc = "asc" //向上查询 - Lines = 20 - TrainUrl = "train_url" - DataUrl = "data_url" - PerPage = 10 - IsLatestVersion = "1" - NotLatestVersion = "0" - ComputeResource = "NPU" + CodePath = "/code/" + OutputPath = "/output/" + LogPath = "/log/" + JobPath = "/job/" + OrderDesc = "desc" //向下查询 + OrderAsc = "asc" //向上查询 + Lines = 20 + TrainUrl = "train_url" + DataUrl = "data_url" + PerPage = 10 + IsLatestVersion = "1" + NotLatestVersion = "0" + ComputeResource = "NPU" + InitFatherVersionName = "V0001" SortByCreateTime = "create_time" ConfigTypeCustom = "custom" @@ -59,21 +60,25 @@ var ( ) type GenerateTrainJobReq struct { - JobName string - Uuid string - Description string - CodeObsPath string - BootFile string - DataUrl string - TrainUrl string - FlavorCode string - LogUrl string - PoolID string - WorkServerNumber int - EngineID int64 - Parameters []models.Parameter - CommitID string - IsLatestVersion string + JobName string + Uuid string + Description string + CodeObsPath string + BootFile string + DataUrl string + TrainUrl string + FlavorCode string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + IsLatestVersion string + Params string + BranchName string + FatherVersionName string + FlavorName string } type GenerateTrainJobVersionReq struct { @@ -90,8 +95,11 @@ type GenerateTrainJobVersionReq struct { WorkServerNumber int EngineID int64 Parameters []models.Parameter + Params string PreVersionId int64 CommitID string + BranchName string + FlavorName string } type VersionInfo struct { @@ -193,7 +201,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description string) error return nil } -func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult *models.CreateTrainJobResult, err error) { +func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { jobResult, err := createTrainJob(models.CreateTrainJobParams{ JobName: req.JobName, Description: req.Description, @@ -215,42 +223,53 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult }) if err != nil { log.Error("CreateJob failed: %v", err.Error()) - return nil, err + return err } attach, err := models.GetAttachmentByUUID(req.Uuid) if err != nil { log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) - return nil, err + return err } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: strconv.FormatInt(jobResult.JobID, 10), - JobName: req.JobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: attach.Name, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: ComputeResource, - EngineID: req.EngineID, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: ComputeResource, + EngineID: req.EngineID, + FatherVersionName: req.FatherVersionName, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.FlavorCode, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) - return nil, err + return err } - return jobResult, nil + return nil } -func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (jobresult *models.CreateTrainJobResult, err error) { +func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (err error) { jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{ Description: req.Description, Config: models.TrainJobVersionConfig{ @@ -271,13 +290,13 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR }, jobId) if err != nil { log.Error("CreateJob failed: %v", err.Error()) - return nil, err + return err } attach, err := models.GetAttachmentByUUID(req.Uuid) if err != nil { log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) - return nil, err + return err } err = models.CreateCloudbrain(&models.Cloudbrain{ @@ -296,10 +315,21 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR FatherVersionName: fatherVersionName, ComputeResource: ComputeResource, EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + PreVersionId: req.PreVersionId, + FlavorCode: req.FlavorCode, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) - return nil, err + return err } repo := ctx.Repo.Repository @@ -319,38 +349,29 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR }) if err != nil { ctx.ServerError("Cloudbrain", err) - return nil, err + return err } //将训练任务的上一版本的isLatestVersion设置为"0" latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(strconv.FormatInt(jobResult.JobID, 10), IsLatestVersion) if err != nil { ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) - return nil, err + return err } err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) - return nil, err + return err } - // lastVersionNum := jobResult.VersionName[1:] - // lastVersionNumToInt64, err := strconv.ParseInt(lastVersionNum, 10, 64) - // if err != nil { - // ctx.ServerError("lastVersionNumToInt64 faild:", err) - // return nil - // } - // lastVersionName := "V" + strconv.FormatInt(lastVersionNumToInt64-1, 10) - //将训练任务的本版本的isLatestVersion设置为"0" - - //将当前版本的isLatestVersion和任务数量更新 + //将当前版本的isLatestVersion设置为"1"和任务数量更新 err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) - return nil, err + return err } - return jobResult, err + return err } func TransTrainJobStatus(status int) string { diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 63cddb32a..25290bf75 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -620,12 +620,17 @@ func TrainJobNewVersion(ctx *context.Context) { func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("versionName") - jobID = "19373" + var versionName = ctx.Query("version_name") + + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) + return err + } t := time.Now() var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] - ctx.Data["job_name"] = jobName + ctx.Data["job_name"] = task.JobName attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) if err != nil { @@ -670,10 +675,14 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.ServerError("GetBranches error:", err) return err } - ctx.Data["Branches"] = Branches - ctx.Data["BranchesCount"] = len(Branches) - ctx.Data["jobID"] = jobID - ctx.Data["versionName"] = versionName + ctx.Data["branches"] = Branches + ctx.Data["branch_name"] = task.BranchName + ctx.Data["description"] = task.Description + ctx.Data["boot_file"] = task.BootFile + ctx.Data["dataset_name"] = task.DatasetName + ctx.Data["params"] = task.Parameters + ctx.Data["work_server_number"] = task.WorkServerNumber + ctx.Data["flavor_name"] = task.FlavorName configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) if err != nil { @@ -705,6 +714,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName isLatestVersion := modelarts.IsLatestVersion + FlavorName := form.FlavorName if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) @@ -833,24 +843,28 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } req := &modelarts.GenerateTrainJobReq{ - JobName: jobName, - DataUrl: dataPath, - Description: description, - CodeObsPath: codeObsPath, - BootFile: codeObsPath + bootFile, - TrainUrl: outputObsPath, - FlavorCode: flavorCode, - WorkServerNumber: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Uuid: uuid, - Parameters: parameters.Parameter, - CommitID: commitID, - IsLatestVersion: isLatestVersion, - } - - jobResult, err := modelarts.GenerateTrainJob(ctx, req) + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFile: codeObsPath + bootFile, + TrainUrl: outputObsPath, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: parameters.Parameter, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branch_name, + Params: form.Params, + FatherVersionName: modelarts.InitFatherVersionName, + FlavorName: FlavorName, + } + + err = modelarts.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewDataPrepare(ctx) @@ -862,34 +876,34 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } - // 保存openi创建训练任务界面的参数 - err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ - - JobName: req.JobName, - JobID: strconv.FormatInt(jobResult.JobID, 10), - VersionName: jobResult.VersionName, - ResourcePools: form.PoolID, - EngineVersions: form.EngineID, - FlavorInfos: form.Flavor, - TrainUrl: outputObsPath, - BootFile: form.BootFile, - Uuid: form.Attachment, - DatasetName: attach.Name, - Params: form.Params, - BranchName: branch_name, - }) + // // 保存openi创建训练任务界面的参数 + // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ - if err != nil { - log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) - trainJobNewVersionDataPrepare(ctx) - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - ctx.Data["datasetName"] = attach.Name - ctx.Data["params"] = form.Params - ctx.Data["branch_name"] = branch_name - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) - return - } + // JobName: req.JobName, + // JobID: strconv.FormatInt(jobResult.JobID, 10), + // VersionName: jobResult.VersionName, + // ResourcePools: form.PoolID, + // EngineVersions: form.EngineID, + // FlavorInfos: form.Flavor, + // TrainUrl: outputObsPath, + // BootFile: form.BootFile, + // Uuid: form.Attachment, + // DatasetName: attach.Name, + // Params: form.Params, + // BranchName: branch_name, + // }) + + // if err != nil { + // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) + // trainJobNewVersionDataPrepare(ctx) + // ctx.Data["bootFile"] = form.BootFile + // ctx.Data["uuid"] = form.Attachment + // ctx.Data["datasetName"] = attach.Name + // ctx.Data["params"] = form.Params + // ctx.Data["branch_name"] = branch_name + // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) + // return + // } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } @@ -918,6 +932,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName fatherVersionName := form.VersionName + FlavorName := form.FlavorName if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) @@ -1063,11 +1078,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ LogUrl: logObsPath, PoolID: poolID, Uuid: uuid, - Parameters: parameters.Parameter, + Params: form.Params, PreVersionId: task.VersionID, CommitID: commitID, + BranchName: branch_name, + FlavorName: FlavorName, } - jobResult, err := modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) + err = modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewVersionDataPrepare(ctx) @@ -1079,33 +1096,33 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ return } // 保存openi创建训练任务界面的参数 - err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ - - JobName: req.JobName, - JobID: strconv.FormatInt(jobResult.JobID, 10), - VersionName: jobResult.VersionName, - ResourcePools: form.PoolID, - EngineVersions: form.EngineID, - FlavorInfos: form.Flavor, - TrainUrl: outputObsPath, - BootFile: form.BootFile, - Uuid: form.Attachment, - DatasetName: attach.Name, - Params: form.Params, - BranchName: branch_name, - }) + // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ + + // JobName: req.JobName, + // JobID: strconv.FormatInt(jobResult.JobID, 10), + // VersionName: jobResult.VersionName, + // ResourcePools: form.PoolID, + // EngineVersions: form.EngineID, + // FlavorInfos: form.Flavor, + // TrainUrl: outputObsPath, + // BootFile: form.BootFile, + // Uuid: form.Attachment, + // DatasetName: attach.Name, + // Params: form.Params, + // BranchName: branch_name, + // }) - if err != nil { - log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) - trainJobNewVersionDataPrepare(ctx) - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - ctx.Data["datasetName"] = attach.Name - ctx.Data["params"] = form.Params - ctx.Data["branch_name"] = branch_name - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) - return - } + // if err != nil { + // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) + // trainJobNewVersionDataPrepare(ctx) + // ctx.Data["bootFile"] = form.BootFile + // ctx.Data["uuid"] = form.Attachment + // ctx.Data["datasetName"] = attach.Name + // ctx.Data["params"] = form.Params + // ctx.Data["branch_name"] = branch_name + // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) + // return + // } // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } @@ -1387,18 +1404,18 @@ func TrainJobStop(ctx *context.Context) { func TrainJobVersionDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") - var versionName = ctx.Params(":versionName") + var versionName = ctx.Query(":versionName") task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } _, err = modelarts.DelTrainJob(jobID) if err != nil { log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } @@ -1408,12 +1425,13 @@ func TrainJobVersionDel(ctx *context.Context) { return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } func TrainJobVersionStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") - var versionName = ctx.Params(":versionName") + var versionName = ctx.Query(":versionName") task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) if err != nil { log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) @@ -1428,7 +1446,8 @@ func TrainJobVersionStop(ctx *context.Context) { return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } func canUserCreateTrainJob(uid int64) (bool, error) {