diff --git a/models/cloudbrain.go b/models/cloudbrain.go
index 62bae29e2..977a4ed65 100755
--- a/models/cloudbrain.go
+++ b/models/cloudbrain.go
@@ -30,9 +30,9 @@ const (
)
const (
- NPUResource = "NPU"
- GPUResource = "CPU/GPU"
- AllResource = "all"
+ NPUResource = "NPU"
+ GPUResource = "CPU/GPU"
+ AllResource = "all"
//notebook storage category
EVSCategory = "EVS"
@@ -2173,7 +2173,18 @@ func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) {
log.Error("GetAttachmentsByUUIDs failed: %v", err)
return nil, datasetNames, err
}
- for i, attach := range attachs {
+ for i, tmpUuid := range uuids {
+ var attach *Attachment
+ for _, tmpAttach := range attachs {
+ if tmpAttach.UUID == tmpUuid {
+ attach = tmpAttach
+ break
+ }
+ }
+ if attach == nil {
+ log.Error("GetAttachmentsByUUIDs failed: %v", err)
+ return nil, datasetNames, err
+ }
fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz")
for _, datasetInfo := range datasetInfos {
if fileName == datasetInfo.Name {
diff --git a/models/cloudbrain_image.go b/models/cloudbrain_image.go
index 71f0c2c94..9a8f9cdcb 100755
--- a/models/cloudbrain_image.go
+++ b/models/cloudbrain_image.go
@@ -518,6 +518,10 @@ func UpdateLocalImageStatus(image *Image) error {
return err
}
+func UpdateAutoIncrementIndex() {
+ x.Exec("SELECT setval('image_id_seq', (SELECT MAX(id) from image))")
+}
+
func DeleteLocalImage(id int64) error {
image := new(Image)
_, err := x.ID(id).Delete(image)
diff --git a/modules/auth/wechat/cloudbrain.go b/modules/auth/wechat/cloudbrain.go
index 5dbe0a4ea..193edd1da 100644
--- a/modules/auth/wechat/cloudbrain.go
+++ b/modules/auth/wechat/cloudbrain.go
@@ -77,7 +77,10 @@ func (CloudbrainStopMsg) ShouldSend(ctx *TemplateContext) bool {
}
for _, v := range setting.CloudbrainStoppedNotifyList {
if v == ctx.Cloudbrain.JobType {
- return true
+ if ctx.Cloudbrain.Duration > 0 && ctx.Cloudbrain.EndTime > 0 {
+ return true
+ }
+ break
}
}
return false
diff --git a/modules/auth/wechat/template.go b/modules/auth/wechat/template.go
index cd8e21f74..2ebd2667d 100644
--- a/modules/auth/wechat/template.go
+++ b/modules/auth/wechat/template.go
@@ -27,11 +27,13 @@ func SendTemplateMsg(template Template, ctx *TemplateContext, userId int64) erro
}
}()
if !template.ShouldSend(ctx) {
+ log.Info("SendTemplateMsg should not Send.jobId=%d jobType=%s", ctx.Cloudbrain.ID, ctx.Cloudbrain.JobType)
return nil
}
openId := models.GetUserWechatOpenId(userId)
if openId == "" {
+ log.Error("Wechat openId not exist,userId=%d", userId)
return errors.New("Wechat openId not exist")
}
req := TemplateMsgRequest{
@@ -56,5 +58,6 @@ func SendTemplateMsg(template Template, ctx *TemplateContext, userId int64) erro
log.Error("SendTemplateMsg err. %v", err)
return err
}
+ log.Info("SendTemplateMsg success")
return nil
}
diff --git a/modules/cloudbrain/resty.go b/modules/cloudbrain/resty.go
index 7b714c4b5..a7a19ed10 100755
--- a/modules/cloudbrain/resty.go
+++ b/modules/cloudbrain/resty.go
@@ -312,6 +312,7 @@ sendjob:
}
err = models.WithTx(func(ctx models.DBContext) error {
+ models.UpdateAutoIncrementIndex()
if dbImage != nil {
dbImage.IsPrivate = params.IsPrivate
dbImage.Description = params.ImageDescription
diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go
index caaae4e8e..0d84d7aa7 100755
--- a/modules/grampus/grampus.go
+++ b/modules/grampus/grampus.go
@@ -23,9 +23,7 @@ const (
NpuWorkDir = "/cache/"
CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
- "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"
- //CommandPrepareScript = "pwd;cd /cache;mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
- // "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"
+ "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;"
CodeArchiveName = "master.zip"
)
diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go
index 9a6ea0574..9e8447978 100755
--- a/modules/modelarts/modelarts.go
+++ b/modules/modelarts/modelarts.go
@@ -134,6 +134,7 @@ type GenerateInferenceJobReq struct {
ModelVersion string
CkptName string
ResultUrl string
+ DatasetName string
}
type VersionInfo struct {
@@ -690,11 +691,11 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
return err
}
- attach, err := models.GetAttachmentByUUID(req.Uuid)
- if err != nil {
- log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
- return err
- }
+ // attach, err := models.GetAttachmentByUUID(req.Uuid)
+ // if err != nil {
+ // log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
+ // return err
+ // }
jobID := strconv.FormatInt(jobResult.JobID, 10)
err = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
@@ -708,7 +709,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
Uuid: req.Uuid,
- DatasetName: attach.Name,
+ DatasetName: req.DatasetName,
CommitID: req.CommitID,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
diff --git a/modules/notification/wechat/wechat.go b/modules/notification/wechat/wechat.go
index 81c697fc9..cd72bb54e 100644
--- a/modules/notification/wechat/wechat.go
+++ b/modules/notification/wechat/wechat.go
@@ -7,6 +7,7 @@ package wechat
import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth/wechat"
+ "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification/base"
)
@@ -24,8 +25,10 @@ func NewNotifier() base.Notifier {
}
func (*wechatNotifier) NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) {
+ log.Info("NotifyChangeCloudbrainStatus cloudbrain.id=%d cloudbrain.status=%s oldStatus=%s", cloudbrain.ID, cloudbrain.Status, oldStatus)
operateType := wechat.GetJobOperateTypeFromCloudbrainStatus(cloudbrain)
if operateType == "" {
+ log.Info("NotifyChangeCloudbrainStatus operateType is incorrect")
return
}
template := wechat.GetTemplateFromOperateType(operateType)
diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini
index 974b03b66..90d0348bb 100755
--- a/options/locale/locale_en-US.ini
+++ b/options/locale/locale_en-US.ini
@@ -1099,7 +1099,8 @@ modelarts.status=Status
modelarts.createtime=CreateTime
modelarts.version_nums = Version Nums
modelarts.version = Version
-modelarts.computing_resources=compute Resources
+modelarts.computing_resources=Compute Resources
+modelarts.cluster.computing_resources=Cluster/Compute Resources
modelarts.ai_center=Ai Center
modelarts.card_type=Card Type
modelarts.cluster=Cluster
@@ -2990,15 +2991,15 @@ mirror_sync_delete = synced and deleted reference %[2]s
at %s#%[2]s`
reject_pull_request = `suggested changes for %s#%[2]s`
upload_dataset=`upload dataset %s`
-task_gpudebugjob=`created CPU/GPU type debugging task%s`
+task_gpudebugjob=`created CPU/GPU type debugging task %s`
task_npudebugjob=`created NPU type debugging task %s`
-task_nputrainjob=`created NPU training task%s`
+task_nputrainjob=`created NPU training task %s`
task_inferencejob=`created reasoning task %s`
task_benchmark=`created profiling task %s`
task_createmodel=`created new model %s`
-task_gputrainjob=`created CPU/GPU training task%s`
-task_c2netnputrainjob=`created NPU training task%s`
-task_c2netgputrainjob=`created CPU/GPU training task%s`
+task_gputrainjob=`created CPU/GPU training task %s`
+task_c2netnputrainjob=`created NPU training task %s`
+task_c2netgputrainjob=`created CPU/GPU training task %s`
[tool]
ago = %s ago
@@ -3092,6 +3093,8 @@ all_ai_center=All Computing NET
resource_cluster = Resource Cluster
resource_cluster_openi = OpenI Resource Cluster
resource_cluster_c2net = China Computing NET
+resource_cluster_openi_simple = OpenI
+resource_cluster_c2net_simple = Computing NET
compute_resource = Computing resources
task_name = Task name
task_type = Task type
diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini
index 39edb2da5..4c38aefcc 100755
--- a/options/locale/locale_zh-CN.ini
+++ b/options/locale/locale_zh-CN.ini
@@ -1110,6 +1110,7 @@ modelarts.deletetime=删除时间
modelarts.version_nums=版本数
modelarts.version=版本
modelarts.computing_resources=计算资源
+modelarts.cluster.computing_resources=集群/计算资源
modelarts.ai_center=智算中心
modelarts.card_type=卡类型
modelarts.cluster=集群
@@ -3106,6 +3107,8 @@ all_ai_center=全部智算中心
resource_cluster = 算力集群
resource_cluster_openi = 启智集群
resource_cluster_c2net = 智算网络集群
+resource_cluster_openi_simple = 启智
+resource_cluster_c2net_simple = 智算网络
compute_resource = 计算资源
task_name = 任务名称
task_type = 任务类型
diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go
index aa51c6e1a..0b941b400 100755
--- a/routers/api/v1/api.go
+++ b/routers/api/v1/api.go
@@ -916,6 +916,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/cloudbrain", func() {
m.Get("/:id", repo.GetCloudbrainTask)
m.Get("/:id/log", repo.CloudbrainGetLog)
+ m.Get("/:id/download_log_file", repo.CloudbrainDownloadLogFile)
m.Group("/train-job", func() {
m.Group("/:jobid", func() {
m.Get("", repo.GetModelArtsTrainJobVersion)
diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go
index c3a803f70..a8c5762e4 100755
--- a/routers/api/v1/repo/cloudbrain.go
+++ b/routers/api/v1/repo/cloudbrain.go
@@ -6,13 +6,17 @@
package repo
import (
- "code.gitea.io/gitea/modules/notification"
+ "bufio"
"encoding/json"
+ "io"
"net/http"
+ "os"
"sort"
"strings"
"time"
+ "code.gitea.io/gitea/modules/notification"
+
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/models"
@@ -366,7 +370,7 @@ func CloudbrainForModelConvertGetLog(ctx *context.Context) {
ctx.JSON(http.StatusOK, result)
}
-func CloudbrainGetLog(ctx *context.Context) {
+func CloudbrainDownloadLogFile(ctx *context.Context) {
ID := ctx.Params(":id")
job, err := models.GetCloudbrainByID(ID)
if err != nil {
@@ -374,14 +378,124 @@ func CloudbrainGetLog(ctx *context.Context) {
ctx.ServerError(err.Error(), err)
return
}
+ prefix := "/" + setting.CBCodePathPrefix + job.JobName + "/model"
+ files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
+ if err != nil {
+ log.Error("query cloudbrain model failed: %v", err)
+ return
+ }
+ fileName := ""
+ for _, file := range files {
+ if strings.HasSuffix(file.FileName, "log.txt") {
+ fileName = file.FileName
+ break
+ }
+ }
+ if fileName != "" {
+ url, err := storage.Attachments.PresignedGetURL(prefix+"/"+fileName, fileName)
+ if err != nil {
+ log.Error("Get minio get SignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
+ ctx.ServerError("Get minio get SignedUrl failed", err)
+ return
+ }
+ http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
+ }
+}
- result := CloudbrainGetLogByJobId(job.JobID, job.JobName)
+func CloudbrainGetLog(ctx *context.Context) {
+ ID := ctx.Params(":id")
+ startLine := ctx.QueryInt("base_line")
+ lines := ctx.QueryInt("lines")
+ endLine := startLine + lines
+ order := ctx.Query("order")
+ if order == "asc" {
+ endLine = startLine
+ startLine = endLine - lines
+ if startLine < 0 {
+ startLine = 0
+ }
+ }
+ job, err := models.GetCloudbrainByID(ID)
+ if err != nil {
+ log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
+ ctx.ServerError(err.Error(), err)
+ return
+ }
+ result := getLogFromModelDir(job.JobName, startLine, endLine)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
- ctx.JSON(http.StatusOK, result)
+
+ re := map[string]interface{}{
+ "JobID": ID,
+ "LogFileName": result["FileName"],
+ "StartLine": startLine,
+ "EndLine": result["endLine"],
+ "Content": result["Content"],
+ "Lines": result["lines"],
+ "CanLogDownload": result["FileName"] != "",
+ }
+ //result := CloudbrainGetLogByJobId(job.JobID, job.JobName)
+
+ ctx.JSON(http.StatusOK, re)
+}
+
+func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]interface{} {
+ prefix := "/" + setting.CBCodePathPrefix + jobName + "/model"
+ files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
+ if err != nil {
+ log.Error("query cloudbrain model failed: %v", err)
+ return nil
+ }
+
+ re := ""
+ fileName := ""
+ count := 0
+ fileEndLine := endLine
+ for _, file := range files {
+ if strings.HasSuffix(file.FileName, "log.txt") {
+ fileName = file.FileName
+ path := storage.GetMinioPath(jobName+"/model/", file.FileName)
+ log.Info("path=" + path)
+ reader, err := os.Open(path)
+ defer reader.Close()
+ if err == nil {
+ r := bufio.NewReader(reader)
+ for i := 0; i < endLine; i++ {
+ line, error := r.ReadString('\n')
+ log.Info("line=" + line)
+ fileEndLine = i
+ if error == io.EOF {
+ log.Info("read file completed.")
+ break
+ }
+ if error != nil {
+ log.Info("read file error." + error.Error())
+ break
+ }
+ if error == nil {
+ if i >= startLine {
+ re = re + line
+ count++
+ }
+ }
+ }
+ } else {
+ log.Info("error:" + err.Error())
+ }
+ break
+ }
+ }
+
+ return map[string]interface{}{
+ "JobName": jobName,
+ "Content": re,
+ "FileName": fileName,
+ "lines": count,
+ "endLine": fileEndLine,
+ }
}
func CloudBrainModelConvertList(ctx *context.APIContext) {
diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go
index 3294efdd1..419c6d6a4 100755
--- a/routers/api/v1/repo/modelarts.go
+++ b/routers/api/v1/repo/modelarts.go
@@ -154,6 +154,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
if job.StartTime == 0 && result.JobInfo.StartedAt > 0 {
job.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
+ oldStatus := job.Status
job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
job.Duration = result.JobInfo.RunSec
job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
@@ -176,6 +177,9 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
aiCenterName = temp[1]
}
}
+ if oldStatus != job.Status {
+ notification.NotifyChangeCloudbrainStatus(job, oldStatus)
+ }
err = models.UpdateTrainJobVersion(job)
if err != nil {
log.Error("UpdateJob failed:", err)
diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go
index cdde7596c..abc2027d3 100755
--- a/routers/repo/grampus.go
+++ b/routers/repo/grampus.go
@@ -767,10 +767,6 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
command += commandDownload
}
- //check download result
- commandCheckRes := "bash -c \"[[ $? -eq 0 ]] && exit 0 || exit -1;\";"
- command += commandCheckRes
-
//unzip code & dataset
toolUnzip := "unzip -q '"
if strings.HasSuffix(datasetName, ".tar.gz") {
@@ -779,16 +775,22 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + "';"
command += commandUnzip
- //check unzip result
- commandCheckRes = "bash -c \"[[ $? -eq 0 ]] && exit 0 || exit -1;\";"
- command += commandCheckRes
-
command += "echo \"unzip finished;start to exec code;\";"
+ // set export
+ var commandExport string
+ if processorType == grampus.ProcessorTypeNPU {
+ commandExport = "export bucket=" + setting.Bucket + " && export remote_path=" + outputRemotePath + ";"
+ } else if processorType == grampus.ProcessorTypeGPU {
+ commandExport = "export env=" + setting.Grampus.Env + " && export remote_path=" + outputRemotePath + ";"
+ }
+
+ command += commandExport
+
//exec code
var parameters models.Parameters
var paramCode string
- param := make([]models.Parameter, 0)
+
if len(paramSrc) != 0 {
err := json.Unmarshal([]byte(paramSrc), ¶meters)
if err != nil {
@@ -797,10 +799,6 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
}
for _, parameter := range parameters.Parameter {
- param = append(param, models.Parameter{
- Label: parameter.Label,
- Value: parameter.Value,
- })
paramCode += " --" + parameter.Label + "=" + parameter.Value
}
}
@@ -820,15 +818,15 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
//upload models
if processorType == grampus.ProcessorTypeNPU {
- commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + workDir + "output/;"
+ commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_npu " + setting.Bucket + " " + outputRemotePath + " " + workDir + "output/;"
command += commandUpload
} else if processorType == grampus.ProcessorTypeGPU {
- commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;"
+ commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_gpu " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;"
command += commandUpload
}
//check exec result
- commandCheckRes = "bash -c \"[[ $result -eq 0 ]] && exit 0 || exit -1\""
+ commandCheckRes := "bash -c \"[[ $result -eq 0 ]] && exit 0 || exit -1\""
command += commandCheckRes
return command, nil
diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go
index 763308930..697e6591e 100755
--- a/routers/repo/modelarts.go
+++ b/routers/repo/modelarts.go
@@ -896,12 +896,12 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
ctx.Data["display_job_name"] = task.DisplayJobName
ctx.Data["job_name"] = task.JobName
- attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID)
- if err != nil {
- ctx.ServerError("GetAllUserAttachments failed:", err)
- return err
- }
- ctx.Data["attachments"] = attachs
+ // attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID)
+ // if err != nil {
+ // ctx.ServerError("GetAllUserAttachments failed:", err)
+ // return err
+ // }
+ // ctx.Data["attachments"] = attachs
var resourcePools modelarts.ResourcePool
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
@@ -945,12 +945,16 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
ctx.ServerError("GetBranches error:", err)
return err
}
-
+ _, _, datasetNames, _, err := getDatasUrlListByUUIDS(task.Uuid)
+ if err != nil {
+ ctx.ServerError("GetAllUserAttachments failed:", err)
+ return err
+ }
ctx.Data["branches"] = branches
ctx.Data["branch_name"] = task.BranchName
ctx.Data["description"] = task.Description
ctx.Data["boot_file"] = task.BootFile
- ctx.Data["dataset_name"] = task.DatasetName
+ ctx.Data["dataset_name"] = datasetNames
ctx.Data["work_server_number"] = task.WorkServerNumber
ctx.Data["flavor_name"] = task.FlavorName
ctx.Data["engine_name"] = task.EngineName
@@ -1972,7 +1976,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
- dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
+ //dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
branchName := form.BranchName
FlavorName := form.FlavorName
EngineName := form.EngineName
@@ -2050,6 +2054,13 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
gitRepo, _ := git.OpenRepository(repo.RepoPath())
commitID, _ := gitRepo.GetBranchCommitID(branchName)
+ _, dataUrl, datasetNames, _, err := getDatasUrlListByUUIDS(uuid)
+ if err != nil {
+ inferenceJobErrorNewDataPrepare(ctx, form)
+ ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
+ return
+ }
+
if err := downloadCode(repo, codeLocalPath, branchName); err != nil {
log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err)
inferenceJobErrorNewDataPrepare(ctx, form)
@@ -2120,7 +2131,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
req := &modelarts.GenerateInferenceJobReq{
JobName: jobName,
DisplayJobName: displayJobName,
- DataUrl: dataPath,
+ DataUrl: dataUrl,
Description: description,
CodeObsPath: codeObsPath,
BootFileUrl: codeObsPath + bootFile,
@@ -2146,6 +2157,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ModelVersion: modelVersion,
CkptName: ckptName,
ResultUrl: resultObsPath,
+ DatasetName: datasetNames,
}
err = modelarts.GenerateInferenceJob(ctx, req)
@@ -2700,11 +2712,23 @@ func getDatasUrlListByUUIDS(uuidStr string) ([]models.Datasurl, string, string,
datasetInfos := make(map[string]models.DatasetInfo)
attachs, err := models.GetAttachmentsByUUIDs(uuids)
- if err != nil {
+ if err != nil || len(attachs) != len(uuids) {
log.Error("GetAttachmentsByUUIDs failed: %v", err)
return datasUrlList, dataUrl, datasetNames, isMultiDataset, errors.New("GetAttachmentsByUUIDs failed")
}
- for i, attach := range attachs {
+
+ for i, tmpUuid := range uuids {
+ var attach *models.Attachment
+ for _, tmpAttach := range attachs {
+ if tmpAttach.UUID == tmpUuid {
+ attach = tmpAttach
+ break
+ }
+ }
+ if attach == nil {
+ log.Error("GetAttachmentsByUUIDs failed: %v", err)
+ return datasUrlList, dataUrl, datasetNames, isMultiDataset, errors.New("GetAttachmentsByUUIDs failed")
+ }
fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz")
for _, datasetInfo := range datasetInfos {
if fileName == datasetInfo.Name {
diff --git a/routers/routes/routes.go b/routers/routes/routes.go
index a4bc09472..41d34b937 100755
--- a/routers/routes/routes.go
+++ b/routers/routes/routes.go
@@ -1100,7 +1100,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainTrainJobDel)
//m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels)
m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadModel)
- //m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobNewVersion)
+ //m.Get("/get_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo.GetLogFromModelDir)
//m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion)
})
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.CloudBrainTrainJobNew)
diff --git a/templates/repo/cloudbrain/inference/new.tmpl b/templates/repo/cloudbrain/inference/new.tmpl
index fc2c37bc7..3ca9684a7 100644
--- a/templates/repo/cloudbrain/inference/new.tmpl
+++ b/templates/repo/cloudbrain/inference/new.tmpl
@@ -186,8 +186,10 @@
{{end}}
-
- {{template "custom/select_dataset_train" .}}
+
+
+
+
diff --git a/templates/repo/cloudbrain/inference/show.tmpl b/templates/repo/cloudbrain/inference/show.tmpl
index 2e8c9f063..97a0b4936 100644
--- a/templates/repo/cloudbrain/inference/show.tmpl
+++ b/templates/repo/cloudbrain/inference/show.tmpl
@@ -268,7 +268,7 @@
{{$.i18n.Tr "repo.model_download"}}
-
+
@@ -461,19 +461,7 @@
-
-
- {{$.i18n.Tr "repo.modelarts.infer_dataset"}}
- |
-
-
-
- {{range $m ,$n := $.datasetDownload}}
- {{.DatasetName}}
- {{end}}
-
- |
-
+
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
@@ -504,6 +492,22 @@
+
+
+
+ 数据集文件 |
+
+
+ {{range $m ,$n := $.datasetDownload}}
+
+ {{.DatasetName}} |
+
+
+ {{end}}
+
+
+
+
diff --git a/templates/repo/cloudbrain/show.tmpl b/templates/repo/cloudbrain/show.tmpl
index 4d4072a69..ccd8e2835 100755
--- a/templates/repo/cloudbrain/show.tmpl
+++ b/templates/repo/cloudbrain/show.tmpl
@@ -364,18 +364,7 @@
|
-
-
- {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
- |
-
-
-
- {{$.duration}}
-
- |
-
+
@@ -404,20 +393,7 @@
-
-
- {{$.i18n.Tr "repo.modelarts.train_job.dataset"}}
- |
-
-
-
- {{range $m ,$n := $.datasetDownload}}
- {{.DatasetName}}
- {{end}}
-
- |
-
+
@@ -496,14 +472,40 @@
|
+
+
+ {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
+ |
+
+
+ {{$.duration}}
+
+ |
+
-
+
+
+
+ 数据集文件 |
+
+
+ {{range $m ,$n := $.datasetDownload}}
+
+ {{.DatasetName}} |
+
+
+ {{end}}
+
+
+
+
diff --git a/templates/repo/cloudbrain/trainjob/show.tmpl b/templates/repo/cloudbrain/trainjob/show.tmpl
index e1199db5f..9d110e8fd 100644
--- a/templates/repo/cloudbrain/trainjob/show.tmpl
+++ b/templates/repo/cloudbrain/trainjob/show.tmpl
@@ -286,10 +286,14 @@
@@ -423,19 +427,7 @@
-
-
- {{$.i18n.Tr "repo.modelarts.train_job.train_dataset"}}
- |
-
-
-
- {{range $m ,$n := $.datasetDownload}}
- {{.DatasetName}}
- {{end}}
-
- |
-
+
@@ -464,6 +456,22 @@
+
+
+
+ 数据集文件 |
+
+
+ {{range $m ,$n := $.datasetDownload}}
+
+ {{.DatasetName}} |
+
+
+ {{end}}
+
+
+
+
@@ -474,7 +482,7 @@
-
@@ -488,18 +496,41 @@
-
@@ -870,7 +901,15 @@
function loadLog(version_name) {
document.getElementById("mask").style.display = "block"
- $.get(`/api/v1/repos/${userName}/${repoPath}/cloudbrain/${taskID}/log?version_name=${version_name}&lines=50&order=asc`, (data) => {
+ let startLine = $('input[name=end_line]').val();
+ if(startLine==""){
+ startLine=0;
+ }
+ let endLine = $('input[name=end_line]').val();
+ if(endLine==""){
+ endLine = 50;
+ }
+ $.get(`/${userName}/${repoPath}/cloudbrain/train-job/${jobID}/get_log?endLine=${endLine}&startLine=${startLine}`, (data) => {
$('input[name=end_line]').val(data.EndLine)
$('input[name=start_line]').val(data.StartLine)
$(`#log_file${version_name}`).text(data.Content)
diff --git a/templates/repo/debugjob/index.tmpl b/templates/repo/debugjob/index.tmpl
index 2756b49fd..912a615c2 100755
--- a/templates/repo/debugjob/index.tmpl
+++ b/templates/repo/debugjob/index.tmpl
@@ -235,7 +235,8 @@
{{.ComputeResource}}
+ class="">
+ {{.ComputeResource}}
{{if .User.Name}}
diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl
index 0e0c80a45..fa82ad5d8 100644
--- a/templates/repo/modelarts/inferencejob/new.tmpl
+++ b/templates/repo/modelarts/inferencejob/new.tmpl
@@ -40,6 +40,7 @@
{{template "repo/header" .}}
{{template "base/alert" .}}
+
@@ -195,8 +196,11 @@
{{end}}
-
- {{template "custom/select_dataset_train" .}}
+
+
+
+
+
diff --git a/templates/repo/modelarts/inferencejob/show.tmpl b/templates/repo/modelarts/inferencejob/show.tmpl
index 39be33aae..628615879 100644
--- a/templates/repo/modelarts/inferencejob/show.tmpl
+++ b/templates/repo/modelarts/inferencejob/show.tmpl
@@ -217,7 +217,7 @@ td, th {
{{$.i18n.Tr "repo.model_download"}}
-
+
@@ -402,19 +402,7 @@ td, th {
|
-
-
- {{$.i18n.Tr "repo.modelarts.infer_dataset"}}
- |
-
-
-
- {{range $m ,$n := $.datasetDownload}}
- {{.DatasetName}}
- {{end}}
-
- |
-
+
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
@@ -445,6 +433,22 @@ td, th {
+
+
+
+ 数据集文件 |
+
+
+ {{range $m ,$n := $.datasetDownload}}
+
+ {{.DatasetName}} |
+
+
+ {{end}}
+
+
+
+
diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl
index 46c1b9a02..42c59ba4b 100755
--- a/templates/repo/modelarts/trainjob/index.tmpl
+++ b/templates/repo/modelarts/trainjob/index.tmpl
@@ -95,7 +95,7 @@
{{$.i18n.Tr "repo.cloudbrain_status_runtime"}}
- {{$.i18n.Tr "repo.modelarts.computing_resources"}}
+ {{$.i18n.Tr "repo.modelarts.cluster.computing_resources"}}
{{$.i18n.Tr "repo.cloudbrain_creator"}}
@@ -137,7 +137,13 @@
- {{.ComputeResource}}
+
+ {{if eq .Cloudbrain.Type 2}}
+ {{$.i18n.Tr "cloudbrain.resource_cluster_c2net_simple"}}
+ {{else}}
+ {{$.i18n.Tr "cloudbrain.resource_cluster_openi_simple"}}
+ {{end}}
+ {{.ComputeResource}}
diff --git a/templates/repo/modelarts/trainjob/show.tmpl b/templates/repo/modelarts/trainjob/show.tmpl
index e5873d540..e780b679f 100755
--- a/templates/repo/modelarts/trainjob/show.tmpl
+++ b/templates/repo/modelarts/trainjob/show.tmpl
@@ -397,16 +397,7 @@
|
-
-
- {{$.i18n.Tr "repo.modelarts.train_job.compute_node"}}
- |
-
-
- {{.WorkServerNumber}}
-
- |
-
+
@@ -445,23 +436,7 @@
-
-
- {{$.i18n.Tr "repo.modelarts.train_job.train_dataset"}}
- |
-
-
-
- {{range $m ,$n := $.datasetList}}
- {{if eq $k $m}}
- {{range $f ,$g := $n}}
- {{.DatasetName}}
- {{end}}
- {{end}}
- {{end}}
-
- |
-
+
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
@@ -486,10 +461,38 @@
|
+
+
+ {{$.i18n.Tr "repo.modelarts.train_job.compute_node"}}
+ |
+
+
+ {{.WorkServerNumber}}
+
+ |
+
+
+
+
+ 数据集文件 |
+
+
+ {{range $m ,$n := $.datasetList}}
+ {{if eq $k $m}}
+ {{range $f ,$g := $n}}
+
+ {{.DatasetName}} |
+
+ {{end}}
+ {{end}}
+ {{end}}
+
+
+
diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl
index 6712f5e7d..cf32409ed 100644
--- a/templates/repo/modelarts/trainjob/version_new.tmpl
+++ b/templates/repo/modelarts/trainjob/version_new.tmpl
@@ -55,6 +55,7 @@
{{template "repo/header" .}}
+
{{template "base/alert" .}}
-
+
+
+
+
+
{{.i18n.Tr "cloudbrain.dataset_path_rule"}}
{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}
diff --git a/web_src/js/components/dataset/selectDataset.vue b/web_src/js/components/dataset/selectDataset.vue
index b36b90cab..02343516c 100755
--- a/web_src/js/components/dataset/selectDataset.vue
+++ b/web_src/js/components/dataset/selectDataset.vue
@@ -586,6 +586,8 @@ export default {
confirmFlag: false,
saveStatusList: [],
+ // 初始化已选择的数据集列表
+ hasSelectDatasetList: [],
//当前项目数据集页面配置的初始化
initCurrentPage: 1,
totalNumCurrent: 0,
@@ -644,6 +646,7 @@ export default {
//tree 勾选触发事件
onCheck(data, checkedInfo) {
+ this.hasSelectDatasetList = [];
if (
this.selectDatasetArray.length === 0 ||
this.selectDatasetArray.every((item) => item.id !== data.id)
@@ -671,9 +674,8 @@ export default {
return item.label;
});
this.saveStatusList = this.selectDatasetArray.map((item) => {
- return item.UUID;
+ return item.id;
});
- // this.confirmDatasetList = this.saveStatusList.join(";");
},
//已选择数据集checkbox group 勾选事件
changeCheckbox(checked, data) {
@@ -686,7 +688,6 @@ export default {
});
this.selectDatasetArray.splice(index, 1);
this.saveStatusList.splice(index, 1);
- // this.confirmDatasetList = this.saveStatusList.join(";");
},
tableHeaderStyle({ row, column, rowIndex, columnIndex }) {
if (rowIndex === 0) {
@@ -722,6 +723,7 @@ export default {
.then((res) => {
this.loadingCurrent = false;
let data = JSON.parse(res.data.data);
+ console.log(data);
this.currentDatasetList = this.transformeTreeData(
data,
"currentTree",
@@ -734,7 +736,10 @@ export default {
let setCheckedKeysList = this.currentDatasetList.reduce(
(pre, cur) => {
cur.Attachments.forEach((item) => {
- if (this.saveStatusList.includes(item.id)) {
+ if (
+ this.saveStatusList.includes(item.id) ||
+ this.hasSelectDatasetList.includes(item.id)
+ ) {
pre.push(item.id);
}
});
@@ -957,10 +962,32 @@ export default {
mounted() {
this.type = $(".cloudbrain-type").data("cloudbrain-type");
this.repoLink = $(".cloudbrain-type").data("repo-link");
+ if ($(".cloudbrain-type").data("dataset-uuid")) {
+ this.hasSelectDatasetList = $(".cloudbrain-type")
+ .data("dataset-uuid")
+ .split(";");
+ let hasSelectDatasetName = $(".cloudbrain-type")
+ .data("dataset-name")
+ .split(";");
+ if (this.hasSelectDatasetList.length !== 0) {
+ this.saveStatusList = this.hasSelectDatasetList;
+ this.checkList = hasSelectDatasetName;
+ this.hasSelectDatasetList.forEach((item, index) => {
+ this.selectDatasetArray.push({
+ id: item,
+ label: hasSelectDatasetName[index],
+ });
+ });
+ }
+ this.confirmDataset();
+ }
+
if (
location.href.indexOf("benchmark") !== -1 ||
- location.href.indexOf("train-job") !== -1
+ location.href.indexOf("train-job") !== -1 ||
+ location.href.indexOf("inference") !== -1
) {
+ console.log("this.benchmarkNew");
this.benchmarkNew = true;
}
if (
diff --git a/web_src/less/_dashboard.less b/web_src/less/_dashboard.less
index c78179d45..3b41664ad 100644
--- a/web_src/less/_dashboard.less
+++ b/web_src/less/_dashboard.less
@@ -100,7 +100,7 @@
}
.issue.title {
- width: 80%;
+ width: 100%;
}
.push.news .content ul {