Browse Source

Merge branch 'multi-dataset' of https://git.openi.org.cn/OpenI/aiforge into multi-dataset

pull/2384/head
zhoupzh 3 years ago
parent
commit
7f2c568742
2 changed files with 189 additions and 59 deletions
  1. +68
    -43
      modules/cloudbrain/cloudbrain.go
  2. +121
    -16
      routers/repo/cloudbrain.go

+ 68
- 43
modules/cloudbrain/cloudbrain.go View File

@@ -44,6 +44,31 @@ var (
TrainResourceSpecs *models.ResourceSpecs TrainResourceSpecs *models.ResourceSpecs
) )


type GenerateCloudBrainTaskReq struct {
Ctx *context.Context
DisplayJobName string
JobName string
Image string
Command string
Uuids string
CodePath string
ModelPath string
BenchmarkPath string
Snn4ImageNetPath string
BrainScorePath string
JobType string
GpuQueue string
Description string
BranchName string
BootFile string
Params string
CommitID string
DataLocalPath string
BenchmarkTypeID int
BenchmarkChildTypeID int
ResourceSpecId int
}

func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
if !ctx.IsSigned { if !ctx.IsSigned {
return false return false
@@ -187,23 +212,23 @@ func AdminOrImageCreaterRight(ctx *context.Context) {


} }


func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description, branchName, bootFile, params, commitID string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error {
func GenerateTask(req GenerateCloudBrainTaskReq) error {


dataActualPath := setting.Attachment.Minio.RealPath + dataActualPath := setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath + setting.Attachment.Minio.BasePath +
models.AttachmentRelativePath(uuid) +
uuid
models.AttachmentRelativePath(req.Uuids) +
req.Uuids


var resourceSpec *models.ResourceSpec var resourceSpec *models.ResourceSpec
var versionCount int var versionCount int
if jobType == string(models.JobTypeTrain) {
if req.JobType == string(models.JobTypeTrain) {
versionCount = 1 versionCount = 1
if TrainResourceSpecs == nil { if TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs)
} }
for _, spec := range TrainResourceSpecs.ResourceSpec { for _, spec := range TrainResourceSpecs.ResourceSpec {
if resourceSpecId == spec.Id {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec resourceSpec = spec
} }
} }
@@ -212,7 +237,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
} }
for _, spec := range ResourceSpecs.ResourceSpec { for _, spec := range ResourceSpecs.ResourceSpec {
if resourceSpecId == spec.Id {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec resourceSpec = spec
} }
} }
@@ -220,25 +245,25 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
} }


if resourceSpec == nil { if resourceSpec == nil {
log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"])
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"])
return errors.New("no such resourceSpec") return errors.New("no such resourceSpec")
} }


var datasetName string var datasetName string
attach, err := models.GetAttachmentByUUID(uuid)
attach, err := models.GetAttachmentByUUID(req.Uuids)
if err != nil { if err != nil {
//for benchmark, do not return error //for benchmark, do not return error
log.Error("GetAttachmentByUUID failed:%v", err)
log.Error("GetAttachmentByUUID failed:%v", err, req.Ctx.Data["MsgID"])
} else { } else {
datasetName = attach.Name datasetName = attach.Name
} }


createTime := timeutil.TimeStampNow() createTime := timeutil.TimeStampNow()
jobResult, err := CreateJob(jobName, models.CreateJobParams{
JobName: jobName,
jobResult, err := CreateJob(req.JobName, models.CreateJobParams{
JobName: req.JobName,
RetryCount: 1, RetryCount: 1,
GpuType: gpuQueue,
Image: image,
GpuType: req.GpuQueue,
Image: req.Image,
TaskRoles: []models.TaskRole{ TaskRoles: []models.TaskRole{
{ {
Name: SubTaskName, Name: SubTaskName,
@@ -249,7 +274,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
GPUNumber: resourceSpec.GpuNum, GPUNumber: resourceSpec.GpuNum,
MemoryMB: resourceSpec.MemMiB, MemoryMB: resourceSpec.MemMiB,
ShmMB: resourceSpec.ShareMemMiB, ShmMB: resourceSpec.ShareMemMiB,
Command: command,
Command: req.Command,
NeedIBDevice: false, NeedIBDevice: false,
IsMainRole: false, IsMainRole: false,
UseNNI: false, UseNNI: false,
@@ -258,7 +283,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
Volumes: []models.Volume{ Volumes: []models.Volume{
{ {
HostPath: models.StHostPath{ HostPath: models.StHostPath{
Path: codePath,
Path: req.CodePath,
MountPath: CodeMountPath, MountPath: CodeMountPath,
ReadOnly: false, ReadOnly: false,
}, },
@@ -272,28 +297,28 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
}, },
{ {
HostPath: models.StHostPath{ HostPath: models.StHostPath{
Path: modelPath,
Path: req.ModelPath,
MountPath: ModelMountPath, MountPath: ModelMountPath,
ReadOnly: false, ReadOnly: false,
}, },
}, },
{ {
HostPath: models.StHostPath{ HostPath: models.StHostPath{
Path: benchmarkPath,
Path: req.BenchmarkPath,
MountPath: BenchMarkMountPath, MountPath: BenchMarkMountPath,
ReadOnly: true, ReadOnly: true,
}, },
}, },
{ {
HostPath: models.StHostPath{ HostPath: models.StHostPath{
Path: snn4imagenetPath,
Path: req.Snn4ImageNetPath,
MountPath: Snn4imagenetMountPath, MountPath: Snn4imagenetMountPath,
ReadOnly: true, ReadOnly: true,
}, },
}, },
{ {
HostPath: models.StHostPath{ HostPath: models.StHostPath{
Path: brainScorePath,
Path: req.BrainScorePath,
MountPath: BrainScoreMountPath, MountPath: BrainScoreMountPath,
ReadOnly: true, ReadOnly: true,
}, },
@@ -301,42 +326,42 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
}, },
}) })
if err != nil { if err != nil {
log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"])
log.Error("CreateJob failed:", err.Error(), req.Ctx.Data["MsgID"])
return err return err
} }
if jobResult.Code != Success { if jobResult.Code != Success {
log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"])
log.Error("CreateJob(%s) failed:%s", req.JobName, jobResult.Msg, req.Ctx.Data["MsgID"])
return errors.New(jobResult.Msg) return errors.New(jobResult.Msg)
} }


var jobID = jobResult.Payload["jobId"].(string) var jobID = jobResult.Payload["jobId"].(string)
err = models.CreateCloudbrain(&models.Cloudbrain{ err = models.CreateCloudbrain(&models.Cloudbrain{
Status: string(models.JobWaiting), Status: string(models.JobWaiting),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
UserID: req.Ctx.User.ID,
RepoID: req.Ctx.Repo.Repository.ID,
JobID: jobID, JobID: jobID,
JobName: jobName,
DisplayJobName: displayJobName,
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
SubTaskName: SubTaskName, SubTaskName: SubTaskName,
JobType: jobType,
JobType: req.JobType,
Type: models.TypeCloudBrainOne, Type: models.TypeCloudBrainOne,
Uuid: uuid,
Image: image,
GpuQueue: gpuQueue,
ResourceSpecId: resourceSpecId,
Uuid: req.Uuids,
Image: req.Image,
GpuQueue: req.GpuQueue,
ResourceSpecId: req.ResourceSpecId,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
BenchmarkTypeID: benchmarkTypeID,
BenchmarkChildTypeID: benchmarkChildTypeID,
Description: description,
BenchmarkTypeID: req.BenchmarkTypeID,
BenchmarkChildTypeID: req.BenchmarkChildTypeID,
Description: req.Description,
IsLatestVersion: "1", IsLatestVersion: "1",
VersionCount: versionCount, VersionCount: versionCount,
BranchName: branchName,
BootFile: bootFile,
BranchName: req.BranchName,
BootFile: req.BootFile,
DatasetName: datasetName, DatasetName: datasetName,
Parameters: params,
Parameters: req.Params,
CreatedUnix: createTime, CreatedUnix: createTime,
UpdatedUnix: createTime, UpdatedUnix: createTime,
CommitID: commitID,
CommitID: req.CommitID,
}) })


if err != nil { if err != nil {
@@ -345,17 +370,17 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,


task, err := models.GetCloudbrainByJobID(jobID) task, err := models.GetCloudbrainByJobID(jobID)
if err != nil { if err != nil {
log.Error("GetCloudbrainByName failed: %v", err.Error())
log.Error("GetCloudbrainByJobID failed: %v", err.Error())
return err return err
} }
stringId := strconv.FormatInt(task.ID, 10) stringId := strconv.FormatInt(task.ID, 10)


if IsBenchmarkJob(jobType) {
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask)
} else if string(models.JobTypeTrain) == jobType {
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, displayJobName, models.ActionCreateGPUTrainTask)
if IsBenchmarkJob(req.JobType) {
notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateBenchMarkTask)
} else if string(models.JobTypeTrain) == req.JobType {
notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateGPUTrainTask)
} else { } else {
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask)
notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask)
} }


return nil return nil


+ 121
- 16
routers/repo/cloudbrain.go View File

@@ -207,7 +207,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
displayJobName := form.DisplayJobName displayJobName := form.DisplayJobName
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
image := strings.TrimSpace(form.Image) image := strings.TrimSpace(form.Image)
uuid := form.Attachment
uuids := form.Attachment
jobType := form.JobType jobType := form.JobType
gpuQueue := form.GpuType gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
@@ -273,6 +273,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
} }
} }


if err = checkDatasetLimit(uuids); err != nil {
log.Error("checkDatasetLimit failed: %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("checkDatasetLimit failed", tpl, &form)
return
}

if branchName == "" { if branchName == "" {
branchName = cloudbrain.DefaultBranchName branchName = cloudbrain.DefaultBranchName
} }
@@ -285,11 +292,31 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {


commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)


err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params,
commitID, 0, 0, resourceSpecId)
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
JobName: jobName,
Image: image,
Command: command,
Uuids: uuids,
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: branchName,
BootFile: form.BootFile,
Params: form.Params,
CommitID: commitID,
BenchmarkTypeID: 0,
BenchmarkChildTypeID: 0,
ResourceSpecId: resourceSpecId,
}

err = cloudbrain.GenerateTask(req)
if err != nil { if err != nil {
cloudBrainNewDataPrepare(ctx) cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(err.Error(), tpl, &form) ctx.RenderWithErr(err.Error(), tpl, &form)
@@ -1982,11 +2009,38 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
//return //return
} }


err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, cloudbrain.DefaultBranchName, "", "",
"", benchmarkTypeID, benchmarkChildTypeID, resourceSpecId)
dataActualPath := setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
models.AttachmentRelativePath(childInfo.Attachment) +
childInfo.Attachment

req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
JobName: jobName,
Image: image,
Command: command,
Uuids: childInfo.Attachment,
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: string(models.JobTypeBenchmark),
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: cloudbrain.DefaultBranchName,
BootFile: "",
Params: "",
CommitID: "",
BenchmarkTypeID: benchmarkTypeID,
BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId,
DataLocalPath: dataActualPath,
}

err = cloudbrain.GenerateTask(req)
if err != nil { if err != nil {
cloudBrainNewDataPrepare(ctx) cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form) ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form)
@@ -2080,11 +2134,38 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
command = fmt.Sprintf(cloudbrain.BrainScoreCommand, getBrainRegion(benchmarkChildTypeID), displayJobName, trimSpaceNewlineInString(form.Description)) command = fmt.Sprintf(cloudbrain.BrainScoreCommand, getBrainRegion(benchmarkChildTypeID), displayJobName, trimSpaceNewlineInString(form.Description))
} }


err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params,
"", 0, benchmarkChildTypeID, resourceSpecId)
dataActualPath := setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
models.AttachmentRelativePath(uuid) +
uuid

req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
JobName: jobName,
Image: image,
Command: command,
Uuids: uuid,
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: branchName,
BootFile: form.BootFile,
Params: form.Params,
CommitID: "",
BenchmarkTypeID: 0,
BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId,
DataLocalPath: dataActualPath,
}

err = cloudbrain.GenerateTask(req)
if err != nil { if err != nil {
cloudBrainNewDataPrepare(ctx) cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(err.Error(), tpl, &form) ctx.RenderWithErr(err.Error(), tpl, &form)
@@ -2195,3 +2276,27 @@ func GetBenchmarkTypes(ctx *context.Context) *models.BenchmarkTypes {
} }
return benchmarkTypesMap[lang] return benchmarkTypesMap[lang]
} }

func checkDatasetLimit(uuidStr string) error {
uuids := strings.Split(uuidStr, ";")
if len(uuids) > 5 {
log.Error("the dataset count(%d) exceed the limit", len(uuids))
return errors.New("the dataset count exceed the limit")
}

attachNames := make(map[string]string)
for _, uuid := range uuids {
attach, err := models.GetAttachmentByUUID(uuid)
if err != nil {
log.Error("GetAttachmentByUUID failed: %v", err)
return err
}

if _, ok := attachNames[attach.Name]; ok {
log.Error("the dataset name is same: %v", attach.Name)
return errors.New("the dataset name is same")
}
attachNames[attach.Name] = attach.Name
}
return nil
}

Loading…
Cancel
Save