@@ -44,6 +44,31 @@ var ( | |||
TrainResourceSpecs *models.ResourceSpecs | |||
) | |||
type GenerateCloudBrainTaskReq struct { | |||
Ctx *context.Context | |||
DisplayJobName string | |||
JobName string | |||
Image string | |||
Command string | |||
Uuids string | |||
CodePath string | |||
ModelPath string | |||
BenchmarkPath string | |||
Snn4ImageNetPath string | |||
BrainScorePath string | |||
JobType string | |||
GpuQueue string | |||
Description string | |||
BranchName string | |||
BootFile string | |||
Params string | |||
CommitID string | |||
DataLocalPath string | |||
BenchmarkTypeID int | |||
BenchmarkChildTypeID int | |||
ResourceSpecId int | |||
} | |||
func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { | |||
if !ctx.IsSigned { | |||
return false | |||
@@ -187,23 +212,23 @@ func AdminOrImageCreaterRight(ctx *context.Context) { | |||
} | |||
func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description, branchName, bootFile, params, commitID string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { | |||
func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
dataActualPath := setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
models.AttachmentRelativePath(uuid) + | |||
uuid | |||
models.AttachmentRelativePath(req.Uuids) + | |||
req.Uuids | |||
var resourceSpec *models.ResourceSpec | |||
var versionCount int | |||
if jobType == string(models.JobTypeTrain) { | |||
if req.JobType == string(models.JobTypeTrain) { | |||
versionCount = 1 | |||
if TrainResourceSpecs == nil { | |||
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) | |||
} | |||
for _, spec := range TrainResourceSpecs.ResourceSpec { | |||
if resourceSpecId == spec.Id { | |||
if req.ResourceSpecId == spec.Id { | |||
resourceSpec = spec | |||
} | |||
} | |||
@@ -212,7 +237,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) | |||
} | |||
for _, spec := range ResourceSpecs.ResourceSpec { | |||
if resourceSpecId == spec.Id { | |||
if req.ResourceSpecId == spec.Id { | |||
resourceSpec = spec | |||
} | |||
} | |||
@@ -220,25 +245,25 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
} | |||
if resourceSpec == nil { | |||
log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"]) | |||
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) | |||
return errors.New("no such resourceSpec") | |||
} | |||
var datasetName string | |||
attach, err := models.GetAttachmentByUUID(uuid) | |||
attach, err := models.GetAttachmentByUUID(req.Uuids) | |||
if err != nil { | |||
//for benchmark, do not return error | |||
log.Error("GetAttachmentByUUID failed:%v", err) | |||
log.Error("GetAttachmentByUUID failed:%v", err, req.Ctx.Data["MsgID"]) | |||
} else { | |||
datasetName = attach.Name | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
jobResult, err := CreateJob(jobName, models.CreateJobParams{ | |||
JobName: jobName, | |||
jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ | |||
JobName: req.JobName, | |||
RetryCount: 1, | |||
GpuType: gpuQueue, | |||
Image: image, | |||
GpuType: req.GpuQueue, | |||
Image: req.Image, | |||
TaskRoles: []models.TaskRole{ | |||
{ | |||
Name: SubTaskName, | |||
@@ -249,7 +274,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
GPUNumber: resourceSpec.GpuNum, | |||
MemoryMB: resourceSpec.MemMiB, | |||
ShmMB: resourceSpec.ShareMemMiB, | |||
Command: command, | |||
Command: req.Command, | |||
NeedIBDevice: false, | |||
IsMainRole: false, | |||
UseNNI: false, | |||
@@ -258,7 +283,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
Volumes: []models.Volume{ | |||
{ | |||
HostPath: models.StHostPath{ | |||
Path: codePath, | |||
Path: req.CodePath, | |||
MountPath: CodeMountPath, | |||
ReadOnly: false, | |||
}, | |||
@@ -272,28 +297,28 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
}, | |||
{ | |||
HostPath: models.StHostPath{ | |||
Path: modelPath, | |||
Path: req.ModelPath, | |||
MountPath: ModelMountPath, | |||
ReadOnly: false, | |||
}, | |||
}, | |||
{ | |||
HostPath: models.StHostPath{ | |||
Path: benchmarkPath, | |||
Path: req.BenchmarkPath, | |||
MountPath: BenchMarkMountPath, | |||
ReadOnly: true, | |||
}, | |||
}, | |||
{ | |||
HostPath: models.StHostPath{ | |||
Path: snn4imagenetPath, | |||
Path: req.Snn4ImageNetPath, | |||
MountPath: Snn4imagenetMountPath, | |||
ReadOnly: true, | |||
}, | |||
}, | |||
{ | |||
HostPath: models.StHostPath{ | |||
Path: brainScorePath, | |||
Path: req.BrainScorePath, | |||
MountPath: BrainScoreMountPath, | |||
ReadOnly: true, | |||
}, | |||
@@ -301,42 +326,42 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
}, | |||
}) | |||
if err != nil { | |||
log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) | |||
log.Error("CreateJob failed:", err.Error(), req.Ctx.Data["MsgID"]) | |||
return err | |||
} | |||
if jobResult.Code != Success { | |||
log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"]) | |||
log.Error("CreateJob(%s) failed:%s", req.JobName, jobResult.Msg, req.Ctx.Data["MsgID"]) | |||
return errors.New(jobResult.Msg) | |||
} | |||
var jobID = jobResult.Payload["jobId"].(string) | |||
err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
Status: string(models.JobWaiting), | |||
UserID: ctx.User.ID, | |||
RepoID: ctx.Repo.Repository.ID, | |||
UserID: req.Ctx.User.ID, | |||
RepoID: req.Ctx.Repo.Repository.ID, | |||
JobID: jobID, | |||
JobName: jobName, | |||
DisplayJobName: displayJobName, | |||
JobName: req.JobName, | |||
DisplayJobName: req.DisplayJobName, | |||
SubTaskName: SubTaskName, | |||
JobType: jobType, | |||
JobType: req.JobType, | |||
Type: models.TypeCloudBrainOne, | |||
Uuid: uuid, | |||
Image: image, | |||
GpuQueue: gpuQueue, | |||
ResourceSpecId: resourceSpecId, | |||
Uuid: req.Uuids, | |||
Image: req.Image, | |||
GpuQueue: req.GpuQueue, | |||
ResourceSpecId: req.ResourceSpecId, | |||
ComputeResource: models.GPUResource, | |||
BenchmarkTypeID: benchmarkTypeID, | |||
BenchmarkChildTypeID: benchmarkChildTypeID, | |||
Description: description, | |||
BenchmarkTypeID: req.BenchmarkTypeID, | |||
BenchmarkChildTypeID: req.BenchmarkChildTypeID, | |||
Description: req.Description, | |||
IsLatestVersion: "1", | |||
VersionCount: versionCount, | |||
BranchName: branchName, | |||
BootFile: bootFile, | |||
BranchName: req.BranchName, | |||
BootFile: req.BootFile, | |||
DatasetName: datasetName, | |||
Parameters: params, | |||
Parameters: req.Params, | |||
CreatedUnix: createTime, | |||
UpdatedUnix: createTime, | |||
CommitID: commitID, | |||
CommitID: req.CommitID, | |||
}) | |||
if err != nil { | |||
@@ -345,17 +370,17 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, | |||
task, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByName failed: %v", err.Error()) | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return err | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
if IsBenchmarkJob(jobType) { | |||
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask) | |||
} else if string(models.JobTypeTrain) == jobType { | |||
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, displayJobName, models.ActionCreateGPUTrainTask) | |||
if IsBenchmarkJob(req.JobType) { | |||
notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateBenchMarkTask) | |||
} else if string(models.JobTypeTrain) == req.JobType { | |||
notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateGPUTrainTask) | |||
} else { | |||
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask) | |||
notification.NotifyOtherTask(req.Ctx.User, req.Ctx.Repo.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask) | |||
} | |||
return nil | |||
@@ -207,7 +207,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
displayJobName := form.DisplayJobName | |||
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) | |||
image := strings.TrimSpace(form.Image) | |||
uuid := form.Attachment | |||
uuids := form.Attachment | |||
jobType := form.JobType | |||
gpuQueue := form.GpuType | |||
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath | |||
@@ -273,6 +273,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
} | |||
} | |||
if err = checkDatasetLimit(uuids); err != nil { | |||
log.Error("checkDatasetLimit failed: %v", err, ctx.Data["MsgID"]) | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr("checkDatasetLimit failed", tpl, &form) | |||
return | |||
} | |||
if branchName == "" { | |||
branchName = cloudbrain.DefaultBranchName | |||
} | |||
@@ -285,11 +292,31 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) | |||
err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params, | |||
commitID, 0, 0, resourceSpecId) | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
JobName: jobName, | |||
Image: image, | |||
Command: command, | |||
Uuids: uuids, | |||
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), | |||
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), | |||
JobType: jobType, | |||
GpuQueue: gpuQueue, | |||
Description: form.Description, | |||
BranchName: branchName, | |||
BootFile: form.BootFile, | |||
Params: form.Params, | |||
CommitID: commitID, | |||
BenchmarkTypeID: 0, | |||
BenchmarkChildTypeID: 0, | |||
ResourceSpecId: resourceSpecId, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
if err != nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr(err.Error(), tpl, &form) | |||
@@ -1982,11 +2009,38 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
//return | |||
} | |||
err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, cloudbrain.DefaultBranchName, "", "", | |||
"", benchmarkTypeID, benchmarkChildTypeID, resourceSpecId) | |||
dataActualPath := setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
models.AttachmentRelativePath(childInfo.Attachment) + | |||
childInfo.Attachment | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
JobName: jobName, | |||
Image: image, | |||
Command: command, | |||
Uuids: childInfo.Attachment, | |||
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), | |||
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), | |||
JobType: string(models.JobTypeBenchmark), | |||
GpuQueue: gpuQueue, | |||
Description: form.Description, | |||
BranchName: cloudbrain.DefaultBranchName, | |||
BootFile: "", | |||
Params: "", | |||
CommitID: "", | |||
BenchmarkTypeID: benchmarkTypeID, | |||
BenchmarkChildTypeID: benchmarkChildTypeID, | |||
ResourceSpecId: resourceSpecId, | |||
DataLocalPath: dataActualPath, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
if err != nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form) | |||
@@ -2080,11 +2134,38 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
command = fmt.Sprintf(cloudbrain.BrainScoreCommand, getBrainRegion(benchmarkChildTypeID), displayJobName, trimSpaceNewlineInString(form.Description)) | |||
} | |||
err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params, | |||
"", 0, benchmarkChildTypeID, resourceSpecId) | |||
dataActualPath := setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
models.AttachmentRelativePath(uuid) + | |||
uuid | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
JobName: jobName, | |||
Image: image, | |||
Command: command, | |||
Uuids: uuid, | |||
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), | |||
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), | |||
JobType: jobType, | |||
GpuQueue: gpuQueue, | |||
Description: form.Description, | |||
BranchName: branchName, | |||
BootFile: form.BootFile, | |||
Params: form.Params, | |||
CommitID: "", | |||
BenchmarkTypeID: 0, | |||
BenchmarkChildTypeID: benchmarkChildTypeID, | |||
ResourceSpecId: resourceSpecId, | |||
DataLocalPath: dataActualPath, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
if err != nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr(err.Error(), tpl, &form) | |||
@@ -2195,3 +2276,27 @@ func GetBenchmarkTypes(ctx *context.Context) *models.BenchmarkTypes { | |||
} | |||
return benchmarkTypesMap[lang] | |||
} | |||
func checkDatasetLimit(uuidStr string) error { | |||
uuids := strings.Split(uuidStr, ";") | |||
if len(uuids) > 5 { | |||
log.Error("the dataset count(%d) exceed the limit", len(uuids)) | |||
return errors.New("the dataset count exceed the limit") | |||
} | |||
attachNames := make(map[string]string) | |||
for _, uuid := range uuids { | |||
attach, err := models.GetAttachmentByUUID(uuid) | |||
if err != nil { | |||
log.Error("GetAttachmentByUUID failed: %v", err) | |||
return err | |||
} | |||
if _, ok := attachNames[attach.Name]; ok { | |||
log.Error("the dataset name is same: %v", attach.Name) | |||
return errors.New("the dataset name is same") | |||
} | |||
attachNames[attach.Name] = attach.Name | |||
} | |||
return nil | |||
} |