|
|
@@ -17,6 +17,7 @@ import ( |
|
|
|
"code.gitea.io/gitea/modules/log" |
|
|
|
"code.gitea.io/gitea/modules/modelarts" |
|
|
|
"code.gitea.io/gitea/modules/setting" |
|
|
|
"code.gitea.io/gitea/modules/timeutil" |
|
|
|
uuid "github.com/satori/go.uuid" |
|
|
|
) |
|
|
|
|
|
|
@@ -35,6 +36,7 @@ const ( |
|
|
|
GpuQueue = "openidgx" |
|
|
|
Success = "S000" |
|
|
|
GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap" |
|
|
|
GPU_TENSORFLOW_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tf2onnx" |
|
|
|
|
|
|
|
PytorchBootFile = "convert_pytorch.py" |
|
|
|
MindsporeBootFile = "convert_mindspore.py" |
|
|
@@ -267,9 +269,11 @@ func downloadConvertCode(repopath string, codePath, branchName string) error { |
|
|
|
|
|
|
|
func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { |
|
|
|
command := "" |
|
|
|
IMAGE_URL := GPU_PYTORCH_IMAGE |
|
|
|
if modelConvert.SrcEngine == PYTORCH_ENGINE { |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, PytorchBootFile) |
|
|
|
} else if modelConvert.SrcEngine == TENSORFLOW_ENGINE { |
|
|
|
IMAGE_URL = GPU_TENSORFLOW_IMAGE |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, TensorFlowGpuBootFile) |
|
|
|
} |
|
|
|
log.Info("command=" + command) |
|
|
@@ -301,7 +305,7 @@ func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context |
|
|
|
JobName: modelConvert.ID, |
|
|
|
RetryCount: 1, |
|
|
|
GpuType: GpuQueue, |
|
|
|
Image: GPU_PYTORCH_IMAGE, |
|
|
|
Image: IMAGE_URL, |
|
|
|
TaskRoles: []models.TaskRole{ |
|
|
|
{ |
|
|
|
Name: SubTaskName, |
|
|
@@ -397,6 +401,32 @@ func isCloudBrainTask(task *models.AiModelConvert) bool { |
|
|
|
func StopModelConvert(ctx *context.Context) { |
|
|
|
id := ctx.Params(":id") |
|
|
|
log.Info("stop model convert start.id=" + id) |
|
|
|
job, err := models.QueryModelConvertById(ctx.Query("ID")) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("Not found task.", err) |
|
|
|
return |
|
|
|
} |
|
|
|
if isCloudBrainTask(job) { |
|
|
|
err = cloudbrain.StopJob(job.CloudBrainTaskId) |
|
|
|
if err != nil { |
|
|
|
log.Error("Stop cloudbrain Job(%s) failed:%v", job.CloudBrainTaskId, err) |
|
|
|
} |
|
|
|
} else { |
|
|
|
_, err = modelarts.StopTrainJob(job.CloudBrainTaskId, job.ModelArtsVersionId) |
|
|
|
if err != nil { |
|
|
|
log.Error("Stop modelarts Job(%s) failed:%v", job.CloudBrainTaskId, err) |
|
|
|
} |
|
|
|
} |
|
|
|
job.Status = string(models.JobStopped) |
|
|
|
if job.EndTime == 0 { |
|
|
|
job.EndTime = timeutil.TimeStampNow() |
|
|
|
} |
|
|
|
models.ModelConvertSetDuration(job) |
|
|
|
err = models.UpdateModelConvert(job) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateModelConvert failed:", err) |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
func ShowModelConvertInfo(ctx *context.Context) { |
|
|
|