';
diff --git a/routers/admin/cloudbrains.go b/routers/admin/cloudbrains.go
index 0481e6743..8cfe10795 100755
--- a/routers/admin/cloudbrains.go
+++ b/routers/admin/cloudbrains.go
@@ -43,12 +43,6 @@ func CloudBrains(ctx *context.Context) {
if page <= 0 {
page = 1
}
- debugType := models.TypeCloudBrainAll
- if listType == models.GPUResource {
- debugType = models.TypeCloudBrainOne
- } else if listType == models.NPUResource {
- debugType = models.TypeCloudBrainTwo
- }
var jobTypes []string
jobTypeNot := false
@@ -77,13 +71,14 @@ func CloudBrains(ctx *context.Context) {
PageSize: setting.UI.IssuePagingNum,
},
Keyword: keyword,
- Type: debugType,
JobTypeNot: jobTypeNot,
JobStatusNot: jobStatusNot,
JobStatus: jobStatuses,
JobTypes: jobTypes,
NeedRepoInfo: true,
IsLatestVersion: modelarts.IsLatestVersion,
+ ComputeResource: listType,
+ Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Get job failed:", err)
diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go
index 0c280b0cb..e6c572e73 100755
--- a/routers/api/v1/api.go
+++ b/routers/api/v1/api.go
@@ -947,6 +947,15 @@ func RegisterRoutes(m *macaron.Macaron) {
})
})
}, reqRepoReader(models.UnitTypeCloudBrain))
+ m.Group("/grampus", func() {
+ m.Group("/train-job", func() {
+ m.Group("/:jobid", func() {
+ m.Get("", repo.GetModelArtsTrainJobVersion)
+ m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.GrampusStopJob)
+ m.Get("/log", repo_ext.GrampusGetLog)
+ })
+ })
+ }, reqRepoReader(models.UnitTypeCloudBrain))
}, repoAssignment())
})
@@ -1046,6 +1055,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Get("/prd/event", authentication.ValidEventSource)
m.Post("/prd/event", authentication.AcceptWechatEvent)
})
+ m.Get("/wechat/material", authentication.GetMaterial)
}, securityHeaders(), context.APIContexter(), sudo())
}
diff --git a/routers/api/v1/repo/cloudbrain_dashboard.go b/routers/api/v1/repo/cloudbrain_dashboard.go
old mode 100644
new mode 100755
index f102a0f05..cc125c97f
--- a/routers/api/v1/repo/cloudbrain_dashboard.go
+++ b/routers/api/v1/repo/cloudbrain_dashboard.go
@@ -103,7 +103,7 @@ func GetAllCloudbrainsOverview(ctx *context.Context) {
if cloudbrain.Cloudbrain.Type == models.TypeCloudBrainTwo {
cloudBrainTwoDuration = cloudBrainTwoDuration + cloudbrain.Cloudbrain.Duration
}
- if cloudbrain.Cloudbrain.Type == models.TypeIntelligentNet {
+ if cloudbrain.Cloudbrain.Type == models.TypeC2Net {
intelligentNetDuration = intelligentNetDuration + cloudbrain.Cloudbrain.Duration
}
@@ -540,7 +540,7 @@ func GetAllCloudbrainsPeriodDistribution(ctx *context.Context) {
cloudTwoJobTypeRes[cloudbrain.JobType] += 1
}
}
- if cloudbrain.Cloudbrain.Type == models.TypeIntelligentNet {
+ if cloudbrain.Cloudbrain.Type == models.TypeC2Net {
if _, ok := intelligentNetJobTypeRes[cloudbrain.JobType]; !ok {
intelligentNetJobTypeRes[cloudbrain.JobType] = 1
} else {
@@ -1287,7 +1287,7 @@ func getCloudbrainType(rs *models.CloudbrainInfo, ctx *context.Context) string {
return ctx.Tr("repo.cloudbrain1")
} else if rs.Cloudbrain.Type == models.TypeCloudBrainTwo {
return ctx.Tr("repo.cloudbrain2")
- } else if rs.Cloudbrain.Type == models.TypeIntelligentNet {
+ } else if rs.Cloudbrain.Type == models.TypeC2Net {
return ctx.Tr("repo.intelligent_net")
} else {
return ctx.Tr("repo.cloudbrain_untype")
diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go
index c14976282..2a0ce19db 100755
--- a/routers/api/v1/repo/modelarts.go
+++ b/routers/api/v1/repo/modelarts.go
@@ -6,6 +6,8 @@
package repo
import (
+ "code.gitea.io/gitea/modules/grampus"
+ "encoding/json"
"net/http"
"strconv"
"strings"
@@ -125,7 +127,8 @@ func GetModelArtsTrainJob(ctx *context.APIContext) {
func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
var (
- err error
+ err error
+ aiCenterName string
)
jobID := ctx.Params(":jobid")
@@ -167,7 +170,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
log.Error("UpdateJob failed:", err)
}
}
- } else {
+ } else if job.Type == models.TypeCloudBrainTwo {
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
if err != nil {
ctx.NotFound(err)
@@ -189,12 +192,50 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
if err != nil {
log.Error("UpdateJob failed:", err)
}
+ } else if job.Type == models.TypeC2Net {
+ result, err := grampus.GetJob(jobID)
+ if err != nil {
+ log.Error("GetJob(%s) failed:%v", job.JobName, err)
+ ctx.NotFound(err)
+ return
+ }
+
+ if job.StartTime == 0 && result.JobInfo.StartedAt > 0 {
+ job.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
+ }
+ job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
+ job.Duration = result.JobInfo.RunSec
+ job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
+
+ if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
+ job.EndTime = job.StartTime.Add(job.Duration)
+ }
+ job.CorrectCreateUnix()
+
+ if len(job.AiCenter) == 0 {
+ if len(result.JobInfo.Tasks) > 0 {
+ if len(result.JobInfo.Tasks[0].CenterID) > 0 && len(result.JobInfo.Tasks[0].CenterName) > 0 {
+ job.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
+ aiCenterName = result.JobInfo.Tasks[0].CenterName[0]
+ }
+ }
+ } else {
+ temp := strings.Split(job.AiCenter, "+")
+ if len(temp) > 1 {
+ aiCenterName = temp[1]
+ }
+ }
+ err = models.UpdateTrainJobVersion(job)
+ if err != nil {
+ log.Error("UpdateJob failed:", err)
+ }
}
ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"JobStatus": job.Status,
"JobDuration": job.TrainJobDuration,
+ "AiCenter": aiCenterName,
})
}
@@ -373,11 +414,29 @@ func ModelList(ctx *context.APIContext) {
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
return
}
- models, err := storage.GetObsListObject(task.JobName, "output/", parentDir, versionName)
- if err != nil {
- log.Info("get TrainJobListModel failed:", err)
- ctx.ServerError("GetObsListObject:", err)
- return
+
+ var fileInfos []storage.FileInfo
+ if task.ComputeResource == models.NPUResource {
+ fileInfos, err = storage.GetObsListObject(task.JobName, "output/", parentDir, versionName)
+ if err != nil {
+ log.Info("get TrainJobListModel failed:", err)
+ ctx.ServerError("GetObsListObject:", err)
+ return
+ }
+ } else if task.ComputeResource == models.GPUResource {
+ files, err := routerRepo.GetModelDirs(task.JobName, parentDir)
+ if err != nil {
+ log.Info("GetModelDirs failed:", err)
+ ctx.ServerError("GetModelDirs:", err)
+ return
+ }
+
+ err = json.Unmarshal([]byte(files), &fileInfos)
+ if err != nil {
+ log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"])
+ ctx.ServerError("json.Unmarshal failed:", err)
+ return
+ }
}
ctx.JSON(http.StatusOK, map[string]interface{}{
@@ -385,7 +444,7 @@ func ModelList(ctx *context.APIContext) {
"VersionName": versionName,
"StatusOK": 0,
"Path": dirArray,
- "Dirs": models,
+ "Dirs": fileInfos,
"task": task,
"PageIsCloudBrain": true,
})
diff --git a/routers/api/v1/repo/repo_dashbord.go b/routers/api/v1/repo/repo_dashbord.go
index b19c93371..b3a01cff1 100644
--- a/routers/api/v1/repo/repo_dashbord.go
+++ b/routers/api/v1/repo/repo_dashbord.go
@@ -887,19 +887,12 @@ func getTimePeroid(ctx *context.Context, recordBeginTime time.Time) (time.Time,
if queryType == "all" {
beginTime = recordBeginTimeTemp
endTime = now
- } else if queryType == "today" {
+ } else if queryType == "yesterday" {
endTime = now
beginTime = time.Date(endTime.Year(), endTime.Month(), endTime.Day(), 0, 0, 0, 0, now.Location())
- } else if queryType == "yesterday" {
- endTime = time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, now.Location())
- beginTime = endTime.AddDate(0, 0, -1)
- } else if queryType == "last_7day" {
- beginTime = now.AddDate(0, 0, -7)
- beginTime = time.Date(beginTime.Year(), beginTime.Month(), beginTime.Day(), 0, 0, 0, 0, now.Location())
- endTime = now
- } else if queryType == "last_30day" {
- beginTime = now.AddDate(0, 0, -30)
+ } else if queryType == "current_week" {
+ beginTime = now.AddDate(0, 0, -int(time.Now().Weekday())+2) //begin from monday
beginTime = time.Date(beginTime.Year(), beginTime.Month(), beginTime.Day(), 0, 0, 0, 0, now.Location())
endTime = now
} else if queryType == "current_month" {
diff --git a/routers/authentication/wechat.go b/routers/authentication/wechat.go
index 152348125..1337ed3d4 100644
--- a/routers/authentication/wechat.go
+++ b/routers/authentication/wechat.go
@@ -8,9 +8,11 @@ import (
"code.gitea.io/gitea/modules/redis/redis_client"
"code.gitea.io/gitea/modules/redis/redis_key"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/routers/response"
"encoding/json"
"errors"
gouuid "github.com/satori/go.uuid"
+ "strconv"
"time"
)
@@ -125,3 +127,23 @@ func createQRCode4Bind(userId int64) (*QRCodeResponse, error) {
}
return result, nil
}
+
+// GetMaterial
+func GetMaterial(ctx *context.Context) {
+ mType := ctx.Query("type")
+ offsetStr := ctx.Query("offset")
+ countStr := ctx.Query("count")
+ var offset, count int
+ if offsetStr == "" {
+ offset = 0
+ } else {
+ offset, _ = strconv.Atoi(offsetStr)
+ }
+ if countStr == "" {
+ count = 20
+ } else {
+ count, _ = strconv.Atoi(countStr)
+ }
+ r := wechat.GetWechatMaterial(mType, offset, count)
+ ctx.JSON(200, response.SuccessWithData(r))
+}
diff --git a/routers/authentication/wechat_event.go b/routers/authentication/wechat_event.go
index 9b1cebec6..887bfba0d 100644
--- a/routers/authentication/wechat_event.go
+++ b/routers/authentication/wechat_event.go
@@ -14,24 +14,48 @@ import (
// https://developers.weixin.qq.com/doc/offiaccount/Message_Management/Passive_user_reply_message.html
func AcceptWechatEvent(ctx *context.Context) {
b, _ := ioutil.ReadAll(ctx.Req.Request.Body)
- we := wechat.WechatEvent{}
+ we := wechat.WechatMsg{}
xml.Unmarshal(b, &we)
-
+ switch we.MsgType {
+ case wechat.WECHAT_MSG_TYPE_EVENT:
+ HandleEventMsg(ctx, we)
+ case wechat.WECHAT_MSG_TYPE_TEXT:
+ HandleTextMsg(ctx, we)
+ }
log.Info("accept wechat event= %+v", we)
- var replyStr string
- switch we.Event {
- case wechat.WECHAT_EVENT_SUBSCRIBE, wechat.WECHAT_EVENT_SCAN:
- replyStr = wechat.HandleSubscribeEvent(we)
- break
+
+}
+
+// ValidEventSource
+func ValidEventSource(ctx *context.Context) {
+ echostr := ctx.Query("echostr")
+ ctx.Write([]byte(echostr))
+ return
+}
+
+func HandleEventMsg(ctx *context.Context, msg wechat.WechatMsg) {
+ switch msg.Event {
+ case wechat.WECHAT_EVENT_SCAN:
+ HandleEventScan(ctx, msg)
+ case wechat.WECHAT_EVENT_SUBSCRIBE:
+ if msg.EventKey != "" {
+ HandleEventScan(ctx, msg)
+ } else {
+ HandleEventSubscribe(ctx, msg)
+ }
+
}
+}
+func HandleEventScan(ctx *context.Context, msg wechat.WechatMsg) {
+ replyStr := wechat.HandleScanEvent(msg)
if replyStr == "" {
log.Info("reply str is empty")
return
}
- reply := &wechat.EventReply{
- ToUserName: we.FromUserName,
- FromUserName: we.ToUserName,
+ reply := &wechat.MsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
CreateTime: time.Now().Unix(),
MsgType: wechat.WECHAT_MSG_TYPE_TEXT,
Content: replyStr,
@@ -39,9 +63,99 @@ func AcceptWechatEvent(ctx *context.Context) {
ctx.XML(200, reply)
}
-// ValidEventSource
-func ValidEventSource(ctx *context.Context) {
- echostr := ctx.Query("echostr")
- ctx.Write([]byte(echostr))
- return
+func HandleEventSubscribe(ctx *context.Context, msg wechat.WechatMsg) {
+ r := wechat.HandleSubscribeEvent(msg)
+ if r == nil {
+ return
+ }
+ reply := buildReplyContent(msg, r)
+ ctx.XML(200, reply)
+}
+
+func HandleTextMsg(ctx *context.Context, msg wechat.WechatMsg) {
+ r := wechat.GetAutomaticReply(msg.Content)
+ if r == nil {
+ log.Info("TextMsg reply is empty")
+ return
+ }
+ reply := buildReplyContent(msg, r)
+ ctx.XML(200, reply)
+}
+
+func buildReplyContent(msg wechat.WechatMsg, r *wechat.WechatReplyContent) interface{} {
+ reply := &wechat.MsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ }
+ switch r.ReplyType {
+ case wechat.ReplyTypeText:
+ return &wechat.TextMsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ Content: r.Reply.Content,
+ }
+
+ case wechat.ReplyTypeImage:
+ return &wechat.ImageMsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ Image: wechat.ImageContent{
+ MediaId: r.Reply.MediaId,
+ },
+ }
+ case wechat.ReplyTypeVoice:
+ return &wechat.VoiceMsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ Voice: wechat.VoiceContent{
+ MediaId: r.Reply.MediaId,
+ },
+ }
+ case wechat.ReplyTypeVideo:
+ return &wechat.VideoMsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ Video: wechat.VideoContent{
+ MediaId: r.Reply.MediaId,
+ Title: r.Reply.Title,
+ Description: r.Reply.Description,
+ },
+ }
+ case wechat.ReplyTypeMusic:
+ return &wechat.MusicMsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ Music: wechat.MusicContent{
+ Title: r.Reply.Title,
+ Description: r.Reply.Description,
+ MusicUrl: r.Reply.MusicUrl,
+ HQMusicUrl: r.Reply.HQMusicUrl,
+ ThumbMediaId: r.Reply.ThumbMediaId,
+ },
+ }
+ case wechat.ReplyTypeNews:
+ return &wechat.NewsMsgReply{
+ ToUserName: msg.FromUserName,
+ FromUserName: msg.ToUserName,
+ CreateTime: time.Now().Unix(),
+ MsgType: r.ReplyType,
+ ArticleCount: len(r.Reply.Articles),
+ Articles: wechat.ArticleItem{
+ Item: r.Reply.Articles},
+ }
+
+ }
+ return reply
}
diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go
index a075f3b70..eb284e2f6 100755
--- a/routers/repo/cloudbrain.go
+++ b/routers/repo/cloudbrain.go
@@ -3,6 +3,7 @@ package repo
import (
"bufio"
"code.gitea.io/gitea/services/reward/point/account"
+ "code.gitea.io/gitea/modules/grampus"
"encoding/json"
"errors"
"fmt"
@@ -187,7 +188,7 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath
ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainOne
+ ctx.Data["datasetType"] = models.TypeCloudBrainOne
ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode")
@@ -1049,6 +1050,7 @@ func GetPublicImages(ctx *context.Context) {
IncludeOfficialOnly: ctx.QueryBool("recommend"),
SearchOrderBy: "type desc, num_stars desc,id desc",
Status: models.IMAGE_STATUS_SUCCESS,
+ CloudbrainType: ctx.QueryInt("cloudbrainType"),
}
getImages(ctx, &opts)
@@ -1484,7 +1486,34 @@ func SyncCloudbrainStatus() {
} else {
log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType)
}
+ } else if task.Type == models.TypeC2Net {
+ result, err := grampus.GetJob(task.JobID)
+ if err != nil {
+ log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
+ continue
+ }
+
+ if result != nil {
+ if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
+ task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
+ }
+ task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
+ task.Duration = result.JobInfo.RunSec
+ task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
+ if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
+ task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
+ }
+ if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
+ task.EndTime = task.StartTime.Add(task.Duration)
+ }
+ task.CorrectCreateUnix()
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
+ continue
+ }
+ }
} else {
log.Error("task.Type(%s) is error:%d", task.JobName, task.Type)
}
diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go
new file mode 100755
index 000000000..52f803d1d
--- /dev/null
+++ b/routers/repo/grampus.go
@@ -0,0 +1,786 @@
+package repo
+
+import (
+ "code.gitea.io/gitea/modules/auth"
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/grampus"
+ "code.gitea.io/gitea/modules/modelarts"
+ "code.gitea.io/gitea/modules/timeutil"
+ "code.gitea.io/gitea/modules/util"
+ "encoding/json"
+ "errors"
+ "github.com/unknwon/com"
+ "io/ioutil"
+ "net/http"
+ "os"
+ "path"
+ "strconv"
+ "strings"
+ "time"
+
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/cloudbrain"
+ "code.gitea.io/gitea/modules/context"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+)
+
+const (
+ tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show"
+
+ //GPU
+ tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new"
+
+ //NPU
+ tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new"
+)
+
+func GrampusTrainJobGPUNew(ctx *context.Context) {
+ ctx.Data["datasetType"] = models.TypeCloudBrainOne
+ err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ if err != nil {
+ ctx.ServerError("get new train-job info failed", err)
+ return
+ }
+ ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUNew)
+}
+
+func GrampusTrainJobNPUNew(ctx *context.Context) {
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ if err != nil {
+ ctx.ServerError("get new train-job info failed", err)
+ return
+ }
+ ctx.HTML(200, tplGrampusTrainJobNPUNew)
+}
+
+func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error {
+ ctx.Data["PageIsCloudBrain"] = true
+
+ t := time.Now()
+ var displayJobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
+ ctx.Data["display_job_name"] = displayJobName
+
+ //get valid images
+ images, err := grampus.GetImages(processType)
+ if err != nil {
+ log.Error("GetImages failed:", err.Error())
+ } else {
+ ctx.Data["images"] = images.Infos
+ }
+
+ grampus.InitSpecialPool()
+
+ ctx.Data["GPUEnabled"] = true
+ ctx.Data["NPUEnabled"] = true
+
+ if grampus.SpecialPools != nil {
+ for _, pool := range grampus.SpecialPools.Pools {
+ if pool.IsExclusive {
+ org, _ := models.GetOrgByName(pool.Org)
+ if org != nil {
+ isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
+ if !isOrgMember {
+ ctx.Data[pool.Type+"Enabled"] = false
+ }
+ }
+ }
+ }
+ }
+
+ //get valid resource specs
+ specs, err := grampus.GetResourceSpecs(processType)
+ if err != nil {
+ log.Error("GetResourceSpecs failed:", err.Error())
+ } else {
+ ctx.Data["flavor_infos"] = specs.Infos
+ }
+
+ //get branches
+ branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0)
+ if err != nil {
+ log.Error("GetBranches error:", err.Error())
+ } else {
+ ctx.Data["branches"] = branches
+ }
+
+ ctx.Data["branchName"] = ctx.Repo.BranchName
+
+ if processType == grampus.ProcessorTypeGPU {
+ ctx.Data["datasetType"] = models.TypeCloudBrainOne
+ } else if processType == grampus.ProcessorTypeNPU {
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ }
+
+ return nil
+}
+
+func grampusParamCheckCreateTrainJob(form auth.CreateGrampusTrainJobForm) error {
+ if !strings.HasSuffix(strings.TrimSpace(form.BootFile), ".py") {
+ log.Error("the boot file(%s) must be a python file", form.BootFile)
+ return errors.New("启动文件必须是python文件")
+ }
+
+ if form.BranchName == "" {
+ log.Error("the branch must not be null!", form.BranchName)
+ return errors.New("代码分支不能为空!")
+ }
+
+ return nil
+}
+
+func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) {
+ displayJobName := form.DisplayJobName
+ jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
+ uuid := form.Attachment
+ description := form.Description
+ bootFile := strings.TrimSpace(form.BootFile)
+ params := form.Params
+ repo := ctx.Repo.Repository
+ codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/"
+ codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/"
+ dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid
+ branchName := form.BranchName
+ flavorName := form.FlavorName
+ image := strings.TrimSpace(form.Image)
+
+ if !jobNamePattern.MatchString(displayJobName) {
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ errStr := checkSpecialPool(ctx, "GPU")
+ if errStr != "" {
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //check count limit
+ count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource)
+ if err != nil {
+ log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form)
+ return
+ } else {
+ if count >= 1 {
+ log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+ }
+
+ //check param
+ if err := grampusParamCheckCreateTrainJob(form); err != nil {
+ log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //check whether the task name in the project is duplicated
+ tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeTrain), displayJobName)
+ if err == nil {
+ if len(tasks) != 0 {
+ log.Error("the job name did already exist", ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+ } else {
+ if !models.IsErrJobNotExist(err) {
+ log.Error("system error, %v", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+ }
+
+ //check dataset
+ attachment, err := models.GetAttachmentByUUID(uuid)
+ if err != nil {
+ log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //prepare code and out path
+ _, err = ioutil.ReadDir(codeLocalPath)
+ if err == nil {
+ os.RemoveAll(codeLocalPath)
+ }
+
+ if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
+ log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //todo: upload code (send to file_server todo this work?)
+ //upload code
+ if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
+ log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
+ if err := mkModelPath(modelPath); err != nil {
+ log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //init model readme
+ if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
+ log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //prepare command
+ command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name)
+ if err != nil {
+ log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)
+
+ req := &grampus.GenerateTrainJobReq{
+ JobName: jobName,
+ DisplayJobName: displayJobName,
+ ComputeResource: models.GPUResource,
+ Command: command,
+ ResourceSpecId: form.FlavorID,
+ ImageUrl: image,
+ Description: description,
+ BootFile: bootFile,
+ Uuid: uuid,
+ CommitID: commitID,
+ BranchName: branchName,
+ Params: form.Params,
+ FlavorName: flavorName,
+ EngineName: image,
+ DatasetName: attachment.Name,
+ IsLatestVersion: modelarts.IsLatestVersion,
+ VersionCount: modelarts.VersionCount,
+ WorkServerNumber: 1,
+ }
+
+ err = grampus.GenerateTrainJob(ctx, req)
+ if err != nil {
+ log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+ ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
+}
+
+func checkSpecialPool(ctx *context.Context, resourceType string) string {
+ grampus.InitSpecialPool()
+ if grampus.SpecialPools != nil {
+ for _, pool := range grampus.SpecialPools.Pools {
+
+ if pool.IsExclusive && pool.Type == resourceType {
+
+ org, _ := models.GetOrgByName(pool.Org)
+ if org != nil {
+ isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
+ if !isOrgMember {
+ return ctx.Tr("repo.grampus.no_operate_right")
+ }
+ }
+ }
+
+ }
+
+ }
+ return ""
+}
+
+func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) {
+ displayJobName := form.DisplayJobName
+ jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
+ uuid := form.Attachment
+ description := form.Description
+ bootFile := strings.TrimSpace(form.BootFile)
+ params := form.Params
+ repo := ctx.Repo.Repository
+ codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
+ codeObsPath := grampus.JobPath + jobName + modelarts.CodePath
+ dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
+ branchName := form.BranchName
+ isLatestVersion := modelarts.IsLatestVersion
+ flavorName := form.FlavorName
+ versionCount := modelarts.VersionCount
+ engineName := form.EngineName
+
+ if !jobNamePattern.MatchString(displayJobName) {
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ errStr := checkSpecialPool(ctx, "NPU")
+ if errStr != "" {
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
+ //check count limit
+ count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource)
+ if err != nil {
+ log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
+ return
+ } else {
+ if count >= 1 {
+ log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+ }
+
+ //check param
+ if err := grampusParamCheckCreateTrainJob(form); err != nil {
+ log.Error("paramCheckCreateTrainJob failed:(%v)", err)
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ //check whether the task name in the project is duplicated
+ tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeTrain), displayJobName)
+ if err == nil {
+ if len(tasks) != 0 {
+ log.Error("the job name did already exist", ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+ } else {
+ if !models.IsErrJobNotExist(err) {
+ log.Error("system error, %v", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+ }
+
+ //check dataset
+ attachment, err := models.GetAttachmentByUUID(uuid)
+ if err != nil {
+ log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ //prepare code and out path
+ _, err = ioutil.ReadDir(codeLocalPath)
+ if err == nil {
+ os.RemoveAll(codeLocalPath)
+ }
+
+ if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
+ log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("Create task failed, server timed out", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ //todo: upload code (send to file_server todo this work?)
+ if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
+ log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("Failed to obsMkdir_output", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
+ log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ //prepare command
+ command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+attachment.Name, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name)
+ if err != nil {
+ log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
+ commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)
+
+ req := &grampus.GenerateTrainJobReq{
+ JobName: jobName,
+ DisplayJobName: displayJobName,
+ ComputeResource: models.NPUResource,
+ Command: command,
+ ResourceSpecId: form.FlavorID,
+ ImageId: form.ImageID,
+ DataUrl: dataObsPath,
+ Description: description,
+ CodeObsPath: codeObsPath,
+ BootFileUrl: codeObsPath + bootFile,
+ BootFile: bootFile,
+ WorkServerNumber: form.WorkServerNumber,
+ Uuid: uuid,
+ CommitID: commitID,
+ IsLatestVersion: isLatestVersion,
+ BranchName: branchName,
+ Params: form.Params,
+ FlavorName: flavorName,
+ EngineName: engineName,
+ VersionCount: versionCount,
+ TotalVersionCount: modelarts.TotalVersionCount,
+ DatasetName: attachment.Name,
+ }
+
+ err = grampus.GenerateTrainJob(ctx, req)
+ if err != nil {
+ log.Error("GenerateTrainJob failed:%v", err.Error())
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+ ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
+}
+
+func GrampusStopJob(ctx *context.Context) {
+ var ID = ctx.Params(":jobid")
+ var resultCode = "0"
+ var errorMsg = ""
+ var status = ""
+
+ task := ctx.Cloudbrain
+ for {
+ if task.Status == string(models.GrampusStatusStopped) || task.Status == string(models.GrampusStatusFailed) || task.Status == string(models.GrampusStatusSucceeded) {
+ log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"])
+ resultCode = "-1"
+ errorMsg = "system error"
+ break
+ }
+
+ res, err := grampus.StopJob(task.JobID)
+ if err != nil {
+ log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"])
+ resultCode = strconv.Itoa(res.ErrorCode)
+ errorMsg = res.ErrorMsg
+ break
+ }
+
+ task.Status = string(models.GrampusStatusStopped)
+ if task.EndTime == 0 {
+ task.EndTime = timeutil.TimeStampNow()
+ }
+ task.ComputeAndSetDuration()
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"])
+ resultCode = "-1"
+ errorMsg = "system error"
+ break
+ }
+
+ status = task.Status
+ break
+ }
+
+ ctx.JSON(200, map[string]interface{}{
+ "result_code": resultCode,
+ "error_msg": errorMsg,
+ "status": status,
+ "id": ID,
+ "StatusOK": 0,
+ })
+}
+
+func GrampusTrainJobDel(ctx *context.Context) {
+ var listType = ctx.Query("listType")
+ if err := deleteGrampusJob(ctx); err != nil {
+ log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"])
+ ctx.ServerError(err.Error(), err)
+ return
+ }
+
+ var isAdminPage = ctx.Query("isadminpage")
+ var isHomePage = ctx.Query("ishomepage")
+ if ctx.IsUserSiteAdmin() && isAdminPage == "true" {
+ ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains")
+ } else if isHomePage == "true" {
+ ctx.Redirect(setting.AppSubURL + "/cloudbrains")
+ } else {
+ ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
+ }
+}
+
+func deleteGrampusJob(ctx *context.Context) error {
+ task := ctx.Cloudbrain
+
+ if task.Status != string(models.GrampusStatusStopped) && task.Status != string(models.GrampusStatusSucceeded) && task.Status != string(models.GrampusStatusFailed) {
+ log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"])
+ return errors.New("the job has not been stopped")
+ }
+
+ err := models.DeleteJob(task)
+ if err != nil {
+ log.Error("DeleteJob failed: %v", err, ctx.Data["msgID"])
+ return err
+ }
+
+ storageType := models.TypeCloudBrainOne
+ if task.ComputeResource == models.NPUResource {
+ storageType = models.TypeCloudBrainTwo
+ }
+ deleteJobStorage(task.JobName, storageType)
+
+ return nil
+}
+
+func GrampusTrainJobShow(ctx *context.Context) {
+ ctx.Data["PageIsCloudBrain"] = true
+
+ var task *models.Cloudbrain
+ task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid"))
+ if err != nil {
+ log.Error("GetCloudbrainByJobID failed:" + err.Error())
+ ctx.ServerError("system error", err)
+ return
+ }
+
+ if task.DeletedAt.IsZero() { //normal record
+ result, err := grampus.GetJob(task.JobID)
+ if err != nil {
+ log.Error("GetJob failed:" + err.Error())
+ //ctx.ServerError("GetJob failed", err)
+ //return
+ }
+
+ if result != nil {
+ if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
+ task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
+ }
+ task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
+ if task.Status != result.JobInfo.Status || result.JobInfo.Status == models.GrampusStatusRunning {
+ task.Duration = result.JobInfo.RunSec
+ task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
+
+ if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
+ task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
+ }
+ if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
+ task.EndTime = task.StartTime.Add(task.Duration)
+ }
+ task.CorrectCreateUnix()
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob failed:" + err.Error())
+ }
+ }
+ }
+ }
+
+ if len(task.Parameters) > 0 {
+ var parameters models.Parameters
+ err := json.Unmarshal([]byte(task.Parameters), ¶meters)
+ if err != nil {
+ log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err)
+ ctx.ServerError("system error", err)
+ return
+ }
+
+ if len(parameters.Parameter) > 0 {
+ paramTemp := ""
+ for _, Parameter := range parameters.Parameter {
+ param := Parameter.Label + " = " + Parameter.Value + "; "
+ paramTemp = paramTemp + param
+ }
+ task.Parameters = paramTemp[:len(paramTemp)-2]
+ } else {
+ task.Parameters = ""
+ }
+ }
+
+ taskList := make([]*models.Cloudbrain, 0)
+ taskList = append(taskList, task)
+ ctx.Data["version_list_task"] = taskList
+
+ ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task)
+ ctx.Data["displayJobName"] = task.DisplayJobName
+
+ aiCenterInfo := strings.Split(task.AiCenter, "+")
+ if len(aiCenterInfo) == 2 {
+ ctx.Data["ai_center"] = aiCenterInfo[1]
+ }
+
+ ctx.HTML(http.StatusOK, tplGrampusTrainJobShow)
+}
+
+func GrampusGetLog(ctx *context.Context) {
+ jobID := ctx.Params(":jobid")
+ job, err := models.GetCloudbrainByJobID(jobID)
+ if err != nil {
+ log.Error("GetCloudbrainByJobID failed: %v", err, ctx.Data["MsgID"])
+ ctx.ServerError(err.Error(), err)
+ return
+ }
+
+ content, err := grampus.GetTrainJobLog(job.JobID)
+ if err != nil {
+ log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"])
+ ctx.ServerError(err.Error(), err)
+ return
+ }
+
+ ctx.JSON(http.StatusOK, map[string]interface{}{
+ "JobName": job.JobName,
+ "Content": content,
+ })
+
+ return
+}
+
+func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) {
+ var command string
+
+ workDir := grampus.NpuWorkDir
+ if processorType == grampus.ProcessorTypeGPU {
+ workDir = grampus.GpuWorkDir
+ }
+
+ command += "pwd;cd " + workDir + grampus.CommandPrepareScript
+ //download code & dataset
+ if processorType == grampus.ProcessorTypeNPU {
+ commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";"
+ command += commandDownload
+ } else if processorType == grampus.ProcessorTypeGPU {
+ commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";"
+ command += commandDownload
+ }
+
+ //check download result
+ commandCheckRes := "bash -c \"[[ $? -eq 0 ]] && exit 0 || exit -1;\";"
+ command += commandCheckRes
+
+ //unzip code & dataset
+ toolUnzip := "unzip -q "
+ if strings.HasSuffix(datasetName, ".tar.gz") {
+ toolUnzip = "tar -zxvf "
+ }
+ commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + ";"
+ command += commandUnzip
+
+ //check unzip result
+ commandCheckRes = "bash -c \"[[ $? -eq 0 ]] && exit 0 || exit -1;\";"
+ command += commandCheckRes
+
+ command += "echo \"unzip finished;start to exec code;\";"
+
+ //exec code
+ var parameters models.Parameters
+ var paramCode string
+ param := make([]models.Parameter, 0)
+ if len(paramSrc) != 0 {
+ err := json.Unmarshal([]byte(paramSrc), ¶meters)
+ if err != nil {
+ log.Error("Failed to Unmarshal params: %s (%v)", paramSrc, err)
+ return command, err
+ }
+
+ for _, parameter := range parameters.Parameter {
+ param = append(param, models.Parameter{
+ Label: parameter.Label,
+ Value: parameter.Value,
+ })
+ paramCode += " --" + parameter.Label + "=" + parameter.Value
+ }
+ }
+
+ commandCode := "cd " + workDir + "code/" + strings.ToLower(repoName) + ";python " + bootFile + paramCode + ";"
+ command += commandCode
+
+ //get exec result
+ commandGetRes := "result=$?;"
+ command += commandGetRes
+
+ //upload models
+ if processorType == grampus.ProcessorTypeNPU {
+ commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + workDir + "output/;"
+ command += commandUpload
+ } else if processorType == grampus.ProcessorTypeGPU {
+ commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;"
+ command += commandUpload
+ }
+
+ //check exec result
+ commandCheckRes = "bash -c \"[[ $result -eq 0 ]] && exit 0 || exit -1;\""
+ command += commandCheckRes
+
+ return command, nil
+}
+
+func downloadZipCode(ctx *context.Context, codePath, branchName string) error {
+ archiveType := git.ZIP
+ archivePath := codePath
+
+ if !com.IsDir(archivePath) {
+ if err := os.MkdirAll(archivePath, os.ModePerm); err != nil {
+ log.Error("MkdirAll failed:" + err.Error())
+ return err
+ }
+ }
+
+ // Get corresponding commit.
+ var (
+ commit *git.Commit
+ err error
+ )
+
+ gitRepo := ctx.Repo.GitRepo
+ if err != nil {
+ log.Error("OpenRepository failed:" + err.Error())
+ return err
+ }
+
+ if gitRepo.IsBranchExist(branchName) {
+ commit, err = gitRepo.GetBranchCommit(branchName)
+ if err != nil {
+ log.Error("GetBranchCommit failed:" + err.Error())
+ return err
+ }
+ }
+
+ archivePath = path.Join(archivePath, grampus.CodeArchiveName)
+ if !com.IsFile(archivePath) {
+ if err := commit.CreateArchive(archivePath, git.CreateArchiveOpts{
+ Format: archiveType,
+ Prefix: setting.Repository.PrefixArchiveFiles,
+ }); err != nil {
+ log.Error("CreateArchive failed:" + err.Error())
+ return err
+ }
+ }
+
+ return nil
+}
diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go
index 0b33a6dd7..e679e8a3e 100755
--- a/routers/repo/modelarts.go
+++ b/routers/repo/modelarts.go
@@ -147,7 +147,7 @@ func notebookNewDataPrepare(ctx *context.Context) error {
}
ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
return nil
}
@@ -573,24 +573,11 @@ func TrainJobIndex(ctx *context.Context) {
}
listType := ctx.Query("listType")
- if len(listType) == 0 {
- listType = models.AllResource
- }
ctx.Data["ListType"] = listType
- typeCloudBrain := models.TypeCloudBrainAll
- if listType == models.GPUResource {
- typeCloudBrain = models.TypeCloudBrainOne
- } else if listType == models.NPUResource {
- typeCloudBrain = models.TypeCloudBrainTwo
- } else if listType == models.AllResource {
- typeCloudBrain = models.TypeCloudBrainAll
+ if listType == models.AllResource {
+ listType = ""
}
- //else {
- // log.Error("listType(%s) error", listType)
- // ctx.ServerError("listType error", errors.New("listType error"))
- // return
- //}
var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
@@ -600,10 +587,11 @@ func TrainJobIndex(ctx *context.Context) {
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
- Type: typeCloudBrain,
JobTypeNot: false,
JobTypes: jobTypes,
IsLatestVersion: modelarts.IsLatestVersion,
+ ComputeResource: listType,
+ Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Cloudbrain", err)
@@ -613,11 +601,6 @@ func TrainJobIndex(ctx *context.Context) {
for i, task := range tasks {
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
- if task.Cloudbrain.Type == models.TypeCloudBrainOne {
- tasks[i].ComputeResource = models.GPUResource
- } else if task.Cloudbrain.Type == models.TypeCloudBrainTwo {
- tasks[i].ComputeResource = models.NPUResource
- }
}
pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
@@ -704,7 +687,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
return err
}
ctx.Data["config_list"] = configList.ParaConfigs
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
return nil
}
@@ -778,7 +761,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts
ctx.Data["bootFile"] = form.BootFile
ctx.Data["uuid"] = form.Attachment
ctx.Data["branch_name"] = form.BranchName
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
return nil
}
@@ -872,7 +855,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
ctx.Data["uuid"] = task.Uuid
ctx.Data["flavor_code"] = task.FlavorCode
ctx.Data["engine_id"] = task.EngineID
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
if err != nil {
@@ -969,7 +952,7 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai
return err
}
ctx.Data["config_list"] = configList.ParaConfigs
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
return nil
}
@@ -2136,7 +2119,7 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error {
New: MODEL_LATEST,
})
ctx.Data["MODEL_COUNT"] = model_count
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
return nil
}
@@ -2202,7 +2185,7 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel
ctx.Data["model_version"] = form.ModelVersion
ctx.Data["ckpt_name"] = form.CkptName
ctx.Data["train_url"] = form.TrainUrl
- ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
+ ctx.Data["datasetType"] = models.TypeCloudBrainTwo
return nil
}
@@ -2272,24 +2255,35 @@ func ModelDownload(ctx *context.Context) {
err error
)
- var jobID = ctx.Params(":jobid")
+ jobID := ctx.Params(":jobid")
versionName := ctx.Query("version_name")
parentDir := ctx.Query("parent_dir")
fileName := ctx.Query("file_name")
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
+ log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", task.JobName, err.Error())
return
}
- path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/")
-
- url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path)
- if err != nil {
- log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
- ctx.ServerError("GetObsCreateSignedUrl", err)
- return
+ var url string
+ if task.ComputeResource == models.NPUResource {
+ path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/")
+ url, err = storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path)
+ if err != nil {
+ log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
+ ctx.ServerError("GetObsCreateSignedUrl", err)
+ return
+ }
+ } else if task.ComputeResource == models.GPUResource {
+ filePath := setting.CBCodePathPrefix + task.JobName + cloudbrain.ModelMountPath + "/" + parentDir
+ url, err = storage.Attachments.PresignedGetURL(filePath, fileName)
+ if err != nil {
+ log.Error("PresignedGetURL failed: %v", err.Error(), ctx.Data["msgID"])
+ ctx.ServerError("PresignedGetURL", err)
+ return
+ }
}
+
ctx.Resp.Header().Set("Cache-Control", "max-age=0")
http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
}
diff --git a/routers/routes/routes.go b/routers/routes/routes.go
index 6cf87b527..00a820fc9 100755
--- a/routers/routes/routes.go
+++ b/routers/routes/routes.go
@@ -1103,6 +1103,24 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate)
})
}, context.RepoRef())
+ m.Group("/grampus", func() {
+ m.Group("/train-job", func() {
+ m.Group("/:jobid", func() {
+ m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow)
+ m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob)
+ m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel)
+ m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload)
+ })
+ m.Group("/gpu", func() {
+ m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.GrampusTrainJobGPUNew)
+ m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobGpuCreate)
+ })
+ m.Group("/npu", func() {
+ m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.GrampusTrainJobNPUNew)
+ m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobNpuCreate)
+ })
+ })
+ }, context.RepoRef())
m.Group("/modelmanage", func() {
m.Post("/create_model", reqRepoModelManageWriter, repo.SaveModel)
m.Post("/create_new_model", repo.SaveNewNameModel)
diff --git a/routers/user/home.go b/routers/user/home.go
index 53aff19b8..ab64e707f 100755
--- a/routers/user/home.go
+++ b/routers/user/home.go
@@ -769,12 +769,6 @@ func Cloudbrains(ctx *context.Context) {
if page <= 0 {
page = 1
}
- debugType := models.TypeCloudBrainAll
- if listType == models.GPUResource {
- debugType = models.TypeCloudBrainOne
- } else if listType == models.NPUResource {
- debugType = models.TypeCloudBrainTwo
- }
var jobTypes []string
jobTypeNot := false
@@ -821,7 +815,6 @@ func Cloudbrains(ctx *context.Context) {
},
Keyword: keyword,
UserID: ctxUser.ID,
- Type: debugType,
JobTypeNot: jobTypeNot,
JobStatusNot: jobStatusNot,
JobStatus: jobStatuses,
@@ -829,6 +822,8 @@ func Cloudbrains(ctx *context.Context) {
NeedRepoInfo: true,
IsLatestVersion: modelarts.IsLatestVersion,
RepoIDList: repoIDList,
+ ComputeResource: listType,
+ Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Get job failed:", err)
diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go
index 6ffa96933..98bcc8a85 100755
--- a/services/socketwrap/clientManager.go
+++ b/services/socketwrap/clientManager.go
@@ -10,7 +10,7 @@ import (
"github.com/elliotchance/orderedmap"
)
-var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}
+var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}
type ClientsManager struct {
Clients *orderedmap.OrderedMap
diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl
index 4c63b167a..347b5658d 100755
--- a/templates/admin/cloudbrain/list.tmpl
+++ b/templates/admin/cloudbrain/list.tmpl
@@ -102,7 +102,7 @@
{{else if eq .JobType "TRAIN"}}
{{.DisplayJobName}}
@@ -204,7 +204,7 @@
{{else}}
{{$.i18n.Tr "repo.stop"}}
@@ -212,7 +212,7 @@