1111
+diff --git a/go.mod b/go.mod index 387a34520..3b83aced9 100755 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/PuerkitoBio/goquery v1.5.0 github.com/RichardKnop/machinery v1.6.9 github.com/RoaringBitmap/roaring v0.4.23 // indirect + github.com/alecthomas/chroma v0.10.0 github.com/alibabacloud-go/darabonba-openapi v0.1.18 github.com/alibabacloud-go/dysmsapi-20170525/v2 v2.0.9 github.com/alibabacloud-go/tea v1.1.17 @@ -120,8 +121,9 @@ require ( github.com/urfave/cli v1.22.1 github.com/xanzy/go-gitlab v0.31.0 github.com/yohcop/openid-go v1.0.0 - github.com/yuin/goldmark v1.1.30 - github.com/yuin/goldmark-meta v0.0.0-20191126180153-f0638e958b60 + github.com/yuin/goldmark v1.4.13 + github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594 + github.com/yuin/goldmark-meta v1.1.0 golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37 golang.org/x/mod v0.3.0 // indirect golang.org/x/net v0.0.0-20200513185701-a91f0712d120 @@ -138,7 +140,7 @@ require ( gopkg.in/ldap.v3 v3.0.2 gopkg.in/macaron.v1 v1.3.9 // indirect gopkg.in/testfixtures.v2 v2.5.0 - gopkg.in/yaml.v2 v2.2.8 + gopkg.in/yaml.v2 v2.3.0 mvdan.cc/xurls/v2 v2.1.0 strk.kbt.io/projects/go/libravatar v0.0.0-20191008002943-06d1c002b251 xorm.io/builder v0.3.7 diff --git a/go.sum b/go.sum index d55d7af48..6735a1938 100755 --- a/go.sum +++ b/go.sum @@ -76,6 +76,8 @@ github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMx github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68= github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 h1:uSoVVbwJiQipAclBbw+8quDsfcvFjOpI5iCf4p/cqCs= github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs= +github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= +github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.2/go.mod h1:sCavSAvdzOjul4cEqeVtvlSaSScfNsTQ+46HwlTL1hc= @@ -203,6 +205,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E= +github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= @@ -804,8 +808,16 @@ github.com/yuin/goldmark v1.1.27 h1:nqDD4MMMQA0lmWq03Z2/myGPYLQoXtmi0rGVs95ntbo= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.30 h1:j4d4Lw3zqZelDhBksEo3BnWg9xhXRQGJPPSL6OApZjI= github.com/yuin/goldmark v1.1.30/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.5/go.mod h1:rmuwmfZ0+bvzB24eSC//bk1R1Zp3hM0OXYv/G2LIilg= +github.com/yuin/goldmark v1.4.6/go.mod h1:rmuwmfZ0+bvzB24eSC//bk1R1Zp3hM0OXYv/G2LIilg= +github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594 h1:yHfZyN55+5dp1wG7wDKv8HQ044moxkyGq12KFFMFDxg= +github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594/go.mod h1:U9ihbh+1ZN7fR5Se3daSPoz1CGF9IYtSvWwVQtnzGHU= github.com/yuin/goldmark-meta v0.0.0-20191126180153-f0638e958b60 h1:gZucqLjL1eDzVWrXj4uiWeMbAopJlBR2mKQAsTGdPwo= github.com/yuin/goldmark-meta v0.0.0-20191126180153-f0638e958b60/go.mod h1:i9VhcIHN2PxXMbQrKqXNueok6QNONoPjNMoj9MygVL0= +github.com/yuin/goldmark-meta v1.1.0 h1:pWw+JLHGZe8Rk0EGsMVssiNb/AaPMHfSRszZeUeiOUc= +github.com/yuin/goldmark-meta v1.1.0/go.mod h1:U4spWENafuA7Zyg+Lj5RqK/MF+ovMYtBvXi1lBb2VP0= github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs= github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -1086,6 +1098,8 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o= diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 5091a8762..62bae29e2 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -8,14 +8,13 @@ import ( "strings" "time" - "code.gitea.io/gitea/modules/util" - "xorm.io/builder" "xorm.io/xorm" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" ) type CloudbrainStatus string @@ -31,9 +30,9 @@ const ( ) const ( - NPUResource = "NPU" - GPUResource = "CPU/GPU" - AllResource = "all" + NPUResource = "NPU" + GPUResource = "CPU/GPU" + AllResource = "all" //notebook storage category EVSCategory = "EVS" @@ -372,6 +371,9 @@ type CloudbrainsOptions struct { ComputeResource string BeginTimeUnix int64 EndTimeUnix int64 + AiCenter string + NeedDeleteInfo string + Cluster string } type TaskPod struct { @@ -1259,6 +1261,52 @@ type LogFile struct { Name string } +type JobList struct { + JobName string `json:"job_name"` + JobID int64 `json:"job_id"` + VersionID int64 `json:"version_id"` + VersionCount int64 `json:"version_count"` + Description string `json:"job_desc"` + IntStatus int `json:"status"` +} + +type GetTrainJobListResult struct { + ErrorResult + JobTotalCount int `json:"job_total_count"` //查询到的用户创建作业总数 + JobCountLimit int `json:"job_count_limit"` //用户还可以创建训练作业的数量 + Quotas int `json:"quotas"` //训练作业的运行数量上限 + JobList []JobList `json:"jobs"` +} + +type JobVersionList struct { + VersionName string `json:"version_name"` + VersionID int64 `json:"version_id"` + IntStatus int `json:"status"` +} + +type GetTrainJobVersionListResult struct { + ErrorResult + JobID int64 `json:"job_id"` + JobName string `json:"job_name"` + JobDesc string `json:"job_desc"` + VersionCount int64 `json:"version_count"` + JobVersionList []JobVersionList `json:"versions"` +} + +type NotebookList struct { + JobName string `json:"name"` + JobID string `json:"id"` + Status string `json:"status"` +} + +type GetNotebookListResult struct { + TotalCount int64 `json:"total"` //总的记录数量 + CurrentPage int `json:"current"` //当前页数 + TotalPages int `json:"pages"` //总的页数 + Size int `json:"size"` //每一页的数量 + NotebookList []NotebookList `json:"data"` +} + //Grampus type GrampusResult struct { ErrorCode int `json:"errorCode"` @@ -1403,6 +1451,23 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { ) } } + if (opts.AiCenter) != "" { + cond = cond.And( + builder.Like{"cloudbrain.ai_center", opts.AiCenter}, + ) + } + if (opts.Cluster) != "" { + if opts.Cluster == "resource_cluster_openi" { + cond = cond.And( + builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}), + ) + } + if opts.Cluster == "resource_cluster_c2net" { + cond = cond.And( + builder.Eq{"cloudbrain.type": TypeC2Net}, + ) + } + } if (opts.IsLatestVersion) != "" { cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"})) @@ -1680,21 +1745,6 @@ func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) { return cloudBrains, err } -func GetWaittingTop() ([]*CloudbrainInfo, error) { - sess := x.NewSession() - defer sess.Close() - var cond = builder.NewCond() - cond = cond.And( - builder.Eq{"cloudbrain.status": string(JobWaiting)}, - ) - sess.OrderBy("cloudbrain.created_unix ASC limit 1") - cloudbrains := make([]*CloudbrainInfo, 0, 1) - if err := sess.Table(&Cloudbrain{}).Where(cond). - Find(&cloudbrains); err != nil { - log.Info("find error.") - } - return cloudbrains, nil -} func GetModelartsReDebugTaskByJobId(jobID string) ([]*Cloudbrain, error) { sess := x.NewSession() defer sess.Close() @@ -1959,6 +2009,24 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { } } + if (opts.AiCenter) != "" { + cond = cond.And( + builder.Like{"cloudbrain.ai_center", opts.AiCenter}, + ) + } + if (opts.NeedDeleteInfo) != "" { + if opts.NeedDeleteInfo == "yes" { + cond = cond.And( + builder.And(builder.NotNull{"cloudbrain.deleted_at"}), + ) + } + if opts.NeedDeleteInfo == "no" { + cond = cond.And( + builder.And(builder.IsNull{"cloudbrain.deleted_at"}), + ) + } + } + if (opts.IsLatestVersion) != "" { cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"})) @@ -2266,3 +2334,8 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) { In("id", ids). Find(&cloudbrains) } + +func GetCloudbrainCountByJobName(jobName, jobType string, typeCloudbrain int) (int, error) { + count, err := x.Where("job_name = ? and job_type= ? and type = ?", jobName, jobType, typeCloudbrain).Count(new(Cloudbrain)) + return int(count), err +} diff --git a/models/cloudbrain_static.go b/models/cloudbrain_static.go index e3ac5e963..371b30f66 100644 --- a/models/cloudbrain_static.go +++ b/models/cloudbrain_static.go @@ -36,133 +36,6 @@ type TaskDetail struct { FlavorName string `json:"FlavorName"` } -func GetDebugOnePeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and job_type ='" + string(JobTypeDebug) + "'" + - " and type='" + strconv.Itoa(TypeCloudBrainOne) + "'" - - return x.SQL(countSql).Count() -} -func GetDebugOnePeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And job_type = ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), JobTypeDebug, TypeCloudBrainOne).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - - return total, nil -} - -func GetTrainOnePeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and job_type ='" + string(JobTypeTrain) + "'" + - " and type='" + strconv.Itoa(TypeCloudBrainOne) + "'" - - return x.SQL(countSql).Count() -} -func GetTrainOnePeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And job_type = ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), JobTypeTrain, TypeCloudBrainOne).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - - return total, nil -} - -func GetBenchmarkOnePeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and job_type ='" + string(JobTypeBenchmark) + "'" + - " and type='" + strconv.Itoa(TypeCloudBrainOne) + "'" - return x.SQL(countSql).Count() -} -func GetBenchmarkOnePeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And job_type = ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), JobTypeBenchmark, TypeCloudBrainOne).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - - return total, nil -} -func GetDebugTwoPeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and job_type ='" + string(JobTypeDebug) + "'" + - " and type='" + strconv.Itoa(TypeCloudBrainTwo) + "'" - return x.SQL(countSql).Count() -} -func GetDebugTwoPeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And job_type = ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), JobTypeDebug, TypeCloudBrainTwo).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - return total, nil -} -func GetTrainTwoPeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and job_type ='" + string(JobTypeTrain) + "'" + - " and type='" + strconv.Itoa(TypeCloudBrainTwo) + "'" - return x.SQL(countSql).Count() -} -func GetTrainTwoPeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And job_type = ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), JobTypeTrain, TypeCloudBrainTwo).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - return total, nil -} -func GetInferenceTwoPeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and job_type ='" + string(JobTypeInference) + "'" + - " and type='" + strconv.Itoa(TypeCloudBrainTwo) + "'" - return x.SQL(countSql).Count() -} -func GetInferenceTwoPeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And job_type = ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), JobTypeInference, TypeCloudBrainTwo).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - return total, nil -} - -func GetCloudBrainOnePeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and type='" + strconv.Itoa(TypeCloudBrainOne) + "'" - return x.SQL(countSql).Count() -} -func GetCloudBrainOnePeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), TypeCloudBrainOne).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - return total, nil -} -func GetCloudBrainTwoPeriodCount(beginTime time.Time, endTime time.Time) (int64, error) { - countSql := "SELECT count(*) FROM " + - "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + - " and created_unix<" + strconv.FormatInt(endTime.Unix(), 10) + - " and type='" + strconv.Itoa(TypeCloudBrainTwo) + "'" - return x.SQL(countSql).Count() -} -func GetCloudBrainTwoPeriodDuration(beginTime time.Time, endTime time.Time) (int64, error) { - total, err := x.Where("created_unix >= ? And created_unix < ? And type = ? ", strconv.FormatInt(beginTime.Unix(), 10), strconv.FormatInt(endTime.Unix(), 10), TypeCloudBrainTwo).SumInt(&Cloudbrain{}, "duration") - if err != nil { - return 0, err - } - return total, nil -} - func GetTodayCreatorCount(beginTime time.Time, endTime time.Time) (int64, error) { countSql := "SELECT count(distinct user_id) FROM " + "public.cloudbrain where created_unix >=" + strconv.FormatInt(beginTime.Unix(), 10) + @@ -211,6 +84,22 @@ func GetAllStatusCloudBrain() map[string]int { return cloudBrainStatusResult } +func GetWaittingTop() ([]*CloudbrainInfo, error) { + sess := x.NewSession() + defer sess.Close() + var cond = builder.NewCond() + cond = cond.And( + builder.Eq{"cloudbrain.status": string(JobWaiting)}, + ) + sess.OrderBy("cloudbrain.created_unix ASC limit 10") + cloudbrains := make([]*CloudbrainInfo, 0, 10) + if err := sess.Table(&Cloudbrain{}).Where(cond). + Find(&cloudbrains); err != nil { + log.Info("find error.") + } + return cloudbrains, nil +} + func GetRunningTop() ([]*CloudbrainInfo, error) { sess := x.NewSession() defer sess.Close() diff --git a/models/cloudbrain_temp.go b/models/cloudbrain_temp.go new file mode 100755 index 000000000..389ee610a --- /dev/null +++ b/models/cloudbrain_temp.go @@ -0,0 +1,68 @@ +package models + +import ( + "time" + + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/timeutil" +) + +const ( + TempJobId = "TEMP" + TempVersionId = TempJobId + TempJobStatus = TempJobId +) + +type CloudbrainTemp struct { + ID int64 `xorm:"pk autoincr"` + JobID string `xorm:"NOT NULL DEFAULT 'TEMP'"` + VersionID string `xorm:"NOT NULL DEFAULT 'TEMP'"` + JobName string `xorm:"NOT NULL "` + Type int `xorm:"NOT NULL "` + JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"` + Status string `xorm:"INDEX NOT NULL DEFAULT 'TEMP'"` + QueryTimes int `xorm:"INDEX NOT NULL DEFAULT 0"` + CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` + UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` + DeletedAt time.Time `xorm:"deleted"` +} + +func InsertCloudbrainTemp(temp *CloudbrainTemp) (err error) { + if _, err = x.Insert(temp); err != nil { + return err + } + + return nil +} + +func getCloudBrainTemp(temp *CloudbrainTemp) (*CloudbrainTemp, error) { + has, err := x.Get(temp) + if err != nil { + return nil, err + } else if !has { + return nil, ErrJobNotExist{} + } + return temp, nil +} + +func GetCloudBrainTempJobs() ([]*CloudbrainTemp, error) { + jobs := make([]*CloudbrainTemp, 0, 10) + return jobs, x.In("status", TempJobStatus, string(ModelArtsStopping), string(ModelArtsTrainJobKilling)). + And("query_times < ?", setting.MaxTempQueryTimes). + Limit(100). + Find(&jobs) +} + +func DeleteCloudbrainTemp(temp *CloudbrainTemp) error { + return deleteCloudbrainTemp(x, temp) +} + +func deleteCloudbrainTemp(e Engine, temp *CloudbrainTemp) error { + _, err := e.ID(temp.ID).Delete(temp) + return err +} + +func UpdateCloudbrainTemp(temp *CloudbrainTemp) error { + _, err := x.ID(temp.ID).AllCols().Update(temp) + return err +} diff --git a/models/models.go b/models/models.go index b714f4650..8898955a7 100755 --- a/models/models.go +++ b/models/models.go @@ -145,6 +145,7 @@ func init() { new(OrgStatistic), new(SearchRecord), new(AiModelConvert), + new(CloudbrainTemp), ) tablesStatistic = append(tablesStatistic, diff --git a/models/repo.go b/models/repo.go index feb6fd3ef..5e11df2b6 100755 --- a/models/repo.go +++ b/models/repo.go @@ -2250,6 +2250,18 @@ func CheckRepoStats(ctx context.Context) error { "UPDATE `repository` SET num_stars=(SELECT COUNT(*) FROM `star` WHERE repo_id=?) WHERE id=?", "repository count 'num_stars'", }, + //Repository.NumIssues + { + "SELECT repo.id FROM `repository` repo WHERE repo.num_issues!=(SELECT COUNT(*) FROM `issue` WHERE repo_id=repo.id AND is_pull=false)", + "UPDATE `repository` SET num_issues=(SELECT COUNT(*) FROM `issue` WHERE repo_id=? AND is_pull=false) WHERE id=?", + "repository count 'num_issues'", + }, + //Repository.NumPulls + { + "SELECT repo.id FROM `repository` repo WHERE repo.num_pulls!=(SELECT COUNT(*) FROM `issue` WHERE repo_id=repo.id AND is_pull=true)", + "UPDATE `repository` SET num_pulls=(SELECT COUNT(*) FROM `issue` WHERE repo_id=? AND is_pull=true) WHERE id=?", + "repository count 'num_pulls'", + }, // Label.NumIssues { "SELECT label.id FROM `label` WHERE label.num_issues!=(SELECT COUNT(*) FROM `issue_label` WHERE label_id=label.id)", diff --git a/models/user.go b/models/user.go index a423a843b..a308d9cba 100755 --- a/models/user.go +++ b/models/user.go @@ -1768,7 +1768,6 @@ func (opts *SearchUserOptions) toConds() builder.Cond { if !opts.IsActive.IsNone() { cond = cond.And(builder.Eq{"is_active": opts.IsActive.IsTrue()}) } - return cond } @@ -1780,12 +1779,15 @@ func SearchUsers(opts *SearchUserOptions) (users []*User, _ int64, _ error) { if err != nil { return nil, 0, fmt.Errorf("Count: %v", err) } - + orderby := opts.OrderBy.String() if len(opts.OrderBy) == 0 { - opts.OrderBy = SearchOrderByAlphabetically + orderby = SearchOrderByAlphabetically.String() + lowerKeyword := strings.ToLower(opts.Keyword) + if len(opts.Keyword) > 0 { + orderby = "CASE when lower_name='" + lowerKeyword + "' then 0 when strpos(lower_name,'" + lowerKeyword + "')>0 then 1 else 2 END ASC,lower_name ASC" + } } - - sess := x.Where(cond).OrderBy(opts.OrderBy.String()) + sess := x.Where(cond).OrderBy(orderby) if opts.Page != 0 { sess = opts.setSessionPagination(sess) } diff --git a/models/user_business_analysis.go b/models/user_business_analysis.go index a36bd4736..0c67a569a 100644 --- a/models/user_business_analysis.go +++ b/models/user_business_analysis.go @@ -412,7 +412,16 @@ func QueryUserStaticDataAll(opts *UserBusinessAnalysisQueryOptions) ([]*UserBusi func QueryDataForUserDefineFromDb(opts *UserBusinessAnalysisQueryOptions, key string) ([]*UserBusinessAnalysis, int64) { statictisSess := xStatistic.NewSession() defer statictisSess.Close() - cond := "data_date='" + key + "'" + + var cond = builder.NewCond() + cond = cond.And( + builder.Eq{"data_date": key}, + ) + if len(opts.UserName) > 0 { + cond = cond.And( + builder.Like{"name", opts.UserName}, + ) + } allCount, err := statictisSess.Where(cond).Count(new(UserBusinessAnalysis)) if err == nil { if allCount > 0 { diff --git a/modules/auth/wechat/client.go b/modules/auth/wechat/client.go index bb6db09d0..9f7b5b45f 100644 --- a/modules/auth/wechat/client.go +++ b/modules/auth/wechat/client.go @@ -61,11 +61,12 @@ type TemplateValue struct { Color string `json:"color"` } -type CloudbrainTaskData struct { +type DefaultWechatTemplate struct { First TemplateValue `json:"first"` Keyword1 TemplateValue `json:"keyword1"` Keyword2 TemplateValue `json:"keyword2"` Keyword3 TemplateValue `json:"keyword3"` + Keyword4 TemplateValue `json:"keyword4"` Remark TemplateValue `json:"remark"` } diff --git a/modules/auth/wechat/cloudbrain.go b/modules/auth/wechat/cloudbrain.go new file mode 100644 index 000000000..5dbe0a4ea --- /dev/null +++ b/modules/auth/wechat/cloudbrain.go @@ -0,0 +1,164 @@ +package wechat + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "fmt" + "time" +) + +type JobOperateType string + +const ( + JobOperateTypeStart JobOperateType = "start" + JobOperateTypeStop JobOperateType = "stop" +) + +type CloudbrainStartMsg struct { +} + +func (CloudbrainStartMsg) Data(ctx *TemplateContext) *DefaultWechatTemplate { + return &DefaultWechatTemplate{ + First: TemplateValue{Value: setting.CloudbrainStartedTitle}, + Keyword1: TemplateValue{Value: ctx.Cloudbrain.DisplayJobName}, + Keyword2: TemplateValue{Value: getJobTypeDisplayName(ctx.Cloudbrain.JobType)}, + Keyword3: TemplateValue{Value: time.Unix(int64(ctx.Cloudbrain.CreatedUnix), 0).Format("2006-01-02 15:04:05")}, + Remark: TemplateValue{Value: setting.CloudbrainStartedRemark}, + } +} + +func (CloudbrainStartMsg) ShouldSend(ctx *TemplateContext) bool { + if len(setting.CloudbrainStartedNotifyList) == 0 { + return false + } + for _, v := range setting.CloudbrainStartedNotifyList { + if v == ctx.Cloudbrain.JobType { + return true + } + } + return false +} + +func (CloudbrainStartMsg) MsgId(ctx *TemplateContext) string { + return string(JobOperateTypeStart) + "_" + fmt.Sprint(ctx.Cloudbrain.ID) +} + +func (CloudbrainStartMsg) Url(ctx *TemplateContext) string { + repo, err := models.GetRepositoryByID(ctx.Cloudbrain.RepoID) + if err != nil { + log.Error("CloudbrainStartMsg GetRepositoryByID error,%v", err) + return "" + } + return getCloudbrainTemplateUrl(*ctx.Cloudbrain, repo) +} + +func (CloudbrainStartMsg) TemplateId(ctx *TemplateContext) string { + return setting.CloudbrainStartedTemplateId +} + +type CloudbrainStopMsg struct { +} + +func (CloudbrainStopMsg) Data(ctx *TemplateContext) *DefaultWechatTemplate { + return &DefaultWechatTemplate{ + First: TemplateValue{Value: setting.CloudbrainStoppedTitle}, + Keyword1: TemplateValue{Value: ctx.Cloudbrain.DisplayJobName}, + Keyword2: TemplateValue{Value: getJobTypeDisplayName(ctx.Cloudbrain.JobType)}, + Keyword3: TemplateValue{Value: time.Unix(int64(ctx.Cloudbrain.CreatedUnix), 0).Format("2006-01-02 15:04:05")}, + Keyword4: TemplateValue{Value: time.Unix(int64(ctx.Cloudbrain.EndTime), 0).Format("2006-01-02 15:04:05")}, + Remark: TemplateValue{Value: setting.CloudbrainStoppedRemark}, + } +} + +func (CloudbrainStopMsg) ShouldSend(ctx *TemplateContext) bool { + if len(setting.CloudbrainStoppedNotifyList) == 0 { + return false + } + for _, v := range setting.CloudbrainStoppedNotifyList { + if v == ctx.Cloudbrain.JobType { + return true + } + } + return false +} + +func (CloudbrainStopMsg) MsgId(ctx *TemplateContext) string { + return string(JobOperateTypeStop) + "_" + fmt.Sprint(ctx.Cloudbrain.ID) +} + +func (CloudbrainStopMsg) Url(ctx *TemplateContext) string { + repo, err := models.GetRepositoryByID(ctx.Cloudbrain.RepoID) + if err != nil { + log.Error("CloudbrainStopMsg GetRepositoryByID error,%v", err) + return "" + } + return getCloudbrainTemplateUrl(*ctx.Cloudbrain, repo) +} + +func (CloudbrainStopMsg) TemplateId(ctx *TemplateContext) string { + return setting.CloudbrainStoppedTemplateId +} + +var startMsg = &CloudbrainStartMsg{} +var stopMsg = &CloudbrainStopMsg{} + +func GetTemplateFromOperateType(operate JobOperateType) Template { + switch operate { + case JobOperateTypeStart: + return startMsg + case JobOperateTypeStop: + return stopMsg + } + return nil +} + +func GetJobOperateTypeFromCloudbrainStatus(cloudbrain *models.Cloudbrain) JobOperateType { + if cloudbrain.IsTerminal() { + return JobOperateTypeStop + } + if cloudbrain.IsRunning() { + return JobOperateTypeStart + } + return "" +} + +func getCloudbrainTemplateUrl(cloudbrain models.Cloudbrain, repo *models.Repository) string { + url := setting.AppURL + repo.FullName() + + switch cloudbrain.JobType { + case string(models.JobTypeDebug): + if cloudbrain.ComputeResource == "CPU/GPU" { + url += "/cloudbrain/" + fmt.Sprint(cloudbrain.ID) + } else { + url += "/modelarts/notebook/" + fmt.Sprint(cloudbrain.ID) + } + case string(models.JobTypeBenchmark): + url += "/cloudbrain/benchmark/" + fmt.Sprint(cloudbrain.ID) + case string(models.JobTypeTrain): + if cloudbrain.Type == models.TypeCloudBrainOne { + url += "/cloudbrain/train-job/" + fmt.Sprint(cloudbrain.JobID) + } else if cloudbrain.Type == models.TypeCloudBrainTwo { + url += "/modelarts/train-job/" + fmt.Sprint(cloudbrain.JobID) + } else if cloudbrain.Type == models.TypeC2Net { + url += "/grampus/train-job/" + fmt.Sprint(cloudbrain.JobID) + } + case string(models.JobTypeInference): + url += "/modelarts/inference-job/" + fmt.Sprint(cloudbrain.JobID) + } + return url +} + +func getJobTypeDisplayName(jobType string) string { + switch jobType { + case string(models.JobTypeDebug): + return "调试任务" + case string(models.JobTypeBenchmark): + return "评测任务" + case string(models.JobTypeTrain): + return "训练任务" + case string(models.JobTypeInference): + return "推理任务" + } + return "" +} diff --git a/modules/auth/wechat/template.go b/modules/auth/wechat/template.go index 6c19651a6..2ebd2667d 100644 --- a/modules/auth/wechat/template.go +++ b/modules/auth/wechat/template.go @@ -3,143 +3,61 @@ package wechat import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/setting" "errors" "fmt" - "time" ) -type JobOperateType string - -const ( - JobOperateTypeStart JobOperateType = "start" - JobOperateTypeStop JobOperateType = "stop" -) +type Template interface { + ShouldSend(ctx *TemplateContext) bool + Data(ctx *TemplateContext) *DefaultWechatTemplate + MsgId(ctx *TemplateContext) string + Url(ctx *TemplateContext) string + TemplateId(ctx *TemplateContext) string +} -func GetJobOperateTypeFromCloudbrainStatus(cloudbrain *models.Cloudbrain) JobOperateType { - if cloudbrain.IsTerminal() { - return JobOperateTypeStop - } - if cloudbrain.IsRunning() { - return JobOperateTypeStart - } - return "" +type TemplateContext struct { + Cloudbrain *models.Cloudbrain } -func SendCloudbrainStartedMsg(operateType JobOperateType, cloudbrain models.Cloudbrain) error { +func SendTemplateMsg(template Template, ctx *TemplateContext, userId int64) error { defer func() { if err := recover(); err != nil { combinedErr := fmt.Errorf("%s\n%s", err, log.Stack(2)) log.Error("PANIC:", combinedErr) } }() - repo, err := models.GetRepositoryByID(cloudbrain.RepoID) - if err != nil { - log.Error("SendCloudbrainStartedMsg GetRepositoryByID error,%v", err) - } - - if setting.CloudbrainStartedTemplateId == "" { + if !template.ShouldSend(ctx) { + log.Info("SendTemplateMsg should not Send.jobId=%d jobType=%s", ctx.Cloudbrain.ID, ctx.Cloudbrain.JobType) return nil } - openId := models.GetUserWechatOpenId(cloudbrain.UserID) + openId := models.GetUserWechatOpenId(userId) if openId == "" { + log.Error("Wechat openId not exist,userId=%d", userId) return errors.New("Wechat openId not exist") } - data := CloudbrainTaskData{ - First: TemplateValue{Value: getCloudbrainTemplateTitle(operateType)}, - Keyword1: TemplateValue{Value: cloudbrain.DisplayJobName}, - Keyword2: TemplateValue{Value: getJobTypeDisplayName(cloudbrain.JobType)}, - Keyword3: TemplateValue{Value: time.Unix(int64(cloudbrain.CreatedUnix), 0).Format("2006-01-02 15:04:05")}, - Remark: TemplateValue{Value: getCloudbrainTemplateRemark(operateType)}, - } req := TemplateMsgRequest{ ToUser: openId, - TemplateId: setting.CloudbrainStartedTemplateId, - Url: getCloudbrainTemplateUrl(cloudbrain, repo), - ClientMsgId: string(operateType) + "_" + fmt.Sprint(cloudbrain.ID), - Data: data, + TemplateId: template.TemplateId(ctx), + Url: template.Url(ctx), + ClientMsgId: template.MsgId(ctx), + Data: template.Data(ctx), } err, retryFlag := sendTemplateMsg(req) if retryFlag { - log.Info("retrySendCloudbrainTemplateMsg calling") + log.Info("SendTemplateMsg calling") refreshAccessToken() err, _ = sendTemplateMsg(req) if err != nil { - log.Error("SendCloudbrainStartedMsg err. %v", err) + log.Error("SendTemplateMsg err. %v", err) return err } return nil } if err != nil { - log.Error("SendCloudbrainStartedMsg err. %v", err) + log.Error("SendTemplateMsg err. %v", err) return err } + log.Info("SendTemplateMsg success") return nil } - -func getCloudbrainTemplateUrl(cloudbrain models.Cloudbrain, repo *models.Repository) string { - url := setting.AppURL + repo.FullName() - - switch cloudbrain.JobType { - case string(models.JobTypeDebug): - if cloudbrain.ComputeResource == "CPU/GPU" { - url += "/cloudbrain/" + fmt.Sprint(cloudbrain.ID) - } else { - url += "/modelarts/notebook/" + fmt.Sprint(cloudbrain.ID) - } - case string(models.JobTypeBenchmark): - url += "/cloudbrain/benchmark/" + fmt.Sprint(cloudbrain.ID) - case string(models.JobTypeTrain): - if cloudbrain.Type == models.TypeCloudBrainOne { - url += "/cloudbrain/train-job/" + fmt.Sprint(cloudbrain.JobID) - } else if cloudbrain.Type == models.TypeCloudBrainTwo { - url += "/modelarts/train-job/" + fmt.Sprint(cloudbrain.JobID) - } else if cloudbrain.Type == models.TypeC2Net { - url += "/grampus/train-job/" + fmt.Sprint(cloudbrain.JobID) - } - case string(models.JobTypeInference): - url += "/modelarts/inference-job/" + fmt.Sprint(cloudbrain.JobID) - } - return url -} - -func getCloudbrainTemplateTitle(operateType JobOperateType) string { - var title string - switch operateType { - case JobOperateTypeStart: - title = "您好,您提交的算力资源申请已通过,任务已启动,请您关注运行情况。" - case JobOperateTypeStop: - title = "您好,您提交的任务已运行结束。" - } - - return title - -} - -func getCloudbrainTemplateRemark(operateType JobOperateType) string { - var remark string - switch operateType { - case JobOperateTypeStart: - remark = "感谢您的耐心等待。" - case JobOperateTypeStop: - remark = "点击可查看运行结果" - } - - return remark - -} - -func getJobTypeDisplayName(jobType string) string { - switch jobType { - case string(models.JobTypeDebug): - return "调试任务" - case string(models.JobTypeBenchmark): - return "评测任务" - case string(models.JobTypeTrain): - return "训练任务" - case string(models.JobTypeInference): - return "推理任务" - } - return "" -} diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 1872375da..e55d8c887 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -142,8 +142,8 @@ func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error) func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { - var ID = ctx.Params(":id") - job, err := models.GetCloudbrainByID(ID) + var id = ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { log.Error("GetCloudbrainByID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) @@ -158,8 +158,8 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { func AdminOrJobCreaterRight(ctx *context.Context) { - var ID = ctx.Params(":id") - job, err := models.GetCloudbrainByID(ID) + var id = ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { log.Error("GetCloudbrainByID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) diff --git a/modules/cron/tasks_basic.go b/modules/cron/tasks_basic.go index b3a6c02a1..080f5bd81 100755 --- a/modules/cron/tasks_basic.go +++ b/modules/cron/tasks_basic.go @@ -5,6 +5,7 @@ package cron import ( + "code.gitea.io/gitea/modules/modelarts" "context" "time" @@ -207,6 +208,17 @@ func registerSyncCloudbrainStatus() { }) } +func registerSyncModelArtsTempJobs() { + RegisterTaskFatal("sync_model_arts_temp_jobs", &BaseConfig{ + Enabled: true, + RunAtStart: false, + Schedule: "@every 1m", + }, func(ctx context.Context, _ *models.User, _ Config) error { + modelarts.SyncTempStatusJob() + return nil + }) +} + func initBasicTasks() { registerUpdateMirrorTask() registerRepoHealthCheck() @@ -227,4 +239,5 @@ func initBasicTasks() { registerSyncCloudbrainStatus() registerHandleOrgStatistic() + registerSyncModelArtsTempJobs() } diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index e50301ffe..407339461 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -7,6 +7,7 @@ package markdown import ( "bytes" + "strings" "sync" "code.gitea.io/gitea/modules/log" @@ -14,6 +15,8 @@ import ( "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/setting" giteautil "code.gitea.io/gitea/modules/util" + chromahtml "github.com/alecthomas/chroma/formatters/html" + highlighting "github.com/yuin/goldmark-highlighting" "github.com/yuin/goldmark" meta "github.com/yuin/goldmark-meta" @@ -42,16 +45,48 @@ func NewGiteaParseContext(urlPrefix string, isWiki bool) parser.Context { func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte { once.Do(func() { converter = goldmark.New( - goldmark.WithExtensions(extension.Table, + goldmark.WithExtensions( + extension.NewTable( + extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)), extension.Strikethrough, extension.TaskList, extension.DefinitionList, common.FootnoteExtension, - extension.NewTypographer( - extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ - extension.EnDash: nil, - extension.EmDash: nil, - extension.Ellipsis: nil, + highlighting.NewHighlighting( + highlighting.WithFormatOptions( + chromahtml.WithClasses(true), + chromahtml.PreventSurroundingPre(true), + ), + highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) { + if entering { + language, _ := c.Language() + if language == nil { + language = []byte("text") + } + + languageStr := string(language) + + preClasses := []string{"code-block"} + if languageStr == "mermaid" { + preClasses = append(preClasses, "is-loading") + } + + _, err := w.WriteString(`
`)
+ if err != nil {
+ return
+ }
+
+ // include language-x class as part of commonmark spec
+ _, err = w.WriteString(``)
+ if err != nil {
+ return
+ }
+ } else {
+ _, err := w.WriteString("
")
+ if err != nil {
+ return
+ }
+ }
}),
),
meta.Meta,
diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go
index 8dcf1b1a9..9a6ea0574 100755
--- a/modules/modelarts/modelarts.go
+++ b/modules/modelarts/modelarts.go
@@ -6,8 +6,7 @@ import (
"fmt"
"path"
"strconv"
-
- "code.gitea.io/gitea/modules/timeutil"
+ "strings"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/context"
@@ -15,6 +14,7 @@ import (
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
+ "code.gitea.io/gitea/modules/timeutil"
)
const (
@@ -59,7 +59,7 @@ const (
PerPage = 10
IsLatestVersion = "1"
NotLatestVersion = "0"
- VersionCount = 1
+ VersionCountOne = 1
SortByCreateTime = "create_time"
ConfigTypeCustom = "custom"
@@ -284,9 +284,24 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
})
if err != nil {
log.Error("createNotebook2 failed: %v", err.Error())
+ if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", displayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: models.TempJobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: jobName,
+ JobType: string(models.JobTypeDebug),
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
+ return errTemp
+ }
+ }
return err
}
- err = models.CreateCloudbrain(&models.Cloudbrain{
+ task := &models.Cloudbrain{
Status: jobResult.Status,
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
@@ -302,16 +317,13 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
Description: description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
- })
-
- if err != nil {
- return err
}
- task, err := models.GetCloudbrainByName(jobName)
+
+ err = models.CreateCloudbrain(task)
if err != nil {
- log.Error("GetCloudbrainByName failed: %v", err.Error())
return err
}
+
stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
return nil
@@ -364,7 +376,22 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
})
}
if createErr != nil {
- log.Error("CreateJob failed: %v", createErr.Error())
+ log.Error("createTrainJob failed: %v", createErr.Error())
+ if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: models.TempJobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: req.JobName,
+ JobType: string(models.JobTypeTrain),
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
+ return errTemp
+ }
+ }
return createErr
}
jobId := strconv.FormatInt(jobResult.JobID, 10)
@@ -438,7 +465,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
createTime := timeutil.TimeStampNow()
var jobResult *models.CreateTrainJobResult
var createErr error
- log.Info(" req.EngineID =" + fmt.Sprint(req.EngineID))
+
if req.EngineID < 0 {
jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{
Description: req.Description,
@@ -480,7 +507,22 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
}, jobId)
}
if createErr != nil {
- log.Error("CreateJob failed: %v", createErr.Error())
+ log.Error("createTrainJobVersion failed: %v", createErr.Error())
+ if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: jobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: req.JobName,
+ JobType: string(models.JobTypeTrain),
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
+ return errTemp
+ }
+ }
return createErr
}
@@ -540,7 +582,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
}
//将训练任务的上一版本的isLatestVersion设置为"0"
- createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
+ createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCountOne, NotLatestVersion, TotalVersionCount)
if createErr != nil {
ctx.ServerError("Update IsLatestVersion failed", createErr)
return createErr
@@ -549,99 +591,6 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
return createErr
}
-func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
- createTime := timeutil.TimeStampNow()
- jobResult, err := createTrainJobUserImage(models.CreateUserImageTrainJobParams{
- JobName: req.JobName,
- Description: req.Description,
- Config: models.UserImageConfig{
- WorkServerNum: req.WorkServerNumber,
- AppUrl: req.CodeObsPath,
- BootFileUrl: req.BootFileUrl,
- DataUrl: req.DataUrl,
- TrainUrl: req.TrainUrl,
- LogUrl: req.LogUrl,
- PoolID: req.PoolID,
- CreateVersion: true,
- Flavor: models.Flavor{
- Code: req.FlavorCode,
- },
- Parameter: req.Parameters,
- UserImageUrl: req.UserImageUrl,
- UserCommand: req.UserCommand,
- },
- })
- if err != nil {
- log.Error("CreateJob failed: %v", err.Error())
- return err
- }
-
- var jobTypes []string
- jobTypes = append(jobTypes, string(models.JobTypeTrain))
- repo := ctx.Repo.Repository
- VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
- RepoID: repo.ID,
- Type: models.TypeCloudBrainTwo,
- JobTypes: jobTypes,
- JobID: strconv.FormatInt(jobResult.JobID, 10),
- })
- if err != nil {
- ctx.ServerError("Cloudbrain", err)
- return err
- }
- //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount
-
- err = models.CreateCloudbrain(&models.Cloudbrain{
- Status: TransTrainJobStatus(jobResult.Status),
- UserID: ctx.User.ID,
- RepoID: ctx.Repo.Repository.ID,
- JobID: strconv.FormatInt(jobResult.JobID, 10),
- JobName: req.JobName,
- DisplayJobName: req.DisplayJobName,
- JobType: string(models.JobTypeTrain),
- Type: models.TypeCloudBrainTwo,
- VersionID: jobResult.VersionID,
- VersionName: jobResult.VersionName,
- Uuid: req.Uuid,
- DatasetName: req.DatasetName,
- CommitID: req.CommitID,
- IsLatestVersion: req.IsLatestVersion,
- PreVersionName: req.PreVersionName,
- ComputeResource: models.NPUResource,
- EngineID: MORDELART_USER_IMAGE_ENGINE_ID,
- Image: req.UserImageUrl,
- TrainUrl: req.TrainUrl,
- BranchName: req.BranchName,
- Parameters: req.Params,
- BootFile: req.BootFile,
- DataUrl: req.DataUrl,
- LogUrl: req.LogUrl,
- PreVersionId: req.PreVersionId,
- FlavorCode: req.FlavorCode,
- Description: req.Description,
- WorkServerNumber: req.WorkServerNumber,
- FlavorName: req.FlavorName,
- EngineName: req.EngineName,
- TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1,
- VersionCount: VersionListCount + 1,
- CreatedUnix: createTime,
- UpdatedUnix: createTime,
- })
- if err != nil {
- log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
- return err
- }
-
- //将训练任务的上一版本的isLatestVersion设置为"0"
- err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
- if err != nil {
- ctx.ServerError("Update IsLatestVersion failed", err)
- return err
- }
-
- return err
-}
-
func TransTrainJobStatus(status int) string {
switch status {
case 0:
@@ -722,7 +671,22 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
},
})
if err != nil {
- log.Error("CreateJob failed: %v", err.Error())
+ log.Error("createInferenceJob failed: %v", err.Error())
+ if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
+ err = models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: models.TempJobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: req.JobName,
+ JobType: string(models.JobTypeInference),
+ })
+ if err != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", err.Error())
+ return err
+ }
+ }
return err
}
@@ -807,3 +771,455 @@ func InitSpecialPool() {
json.Unmarshal([]byte(setting.ModelArtsSpecialPools), &SpecialPools)
}
}
+
+func HandleTrainJobInfo(task *models.Cloudbrain) error {
+
+ result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
+ if err != nil {
+ log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
+ return err
+ }
+
+ if result != nil {
+ oldStatus := task.Status
+ task.Status = TransTrainJobStatus(result.IntStatus)
+ task.Duration = result.Duration / 1000
+ task.TrainJobDuration = result.TrainJobDuration
+
+ if task.StartTime == 0 && result.StartTime > 0 {
+ task.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
+ }
+ task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
+ if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
+ task.EndTime = task.StartTime.Add(task.Duration)
+ }
+ task.CorrectCreateUnix()
+ if oldStatus != task.Status {
+ notification.NotifyChangeCloudbrainStatus(task, oldStatus)
+ }
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func HandleNotebookInfo(task *models.Cloudbrain) error {
+
+ result, err := GetNotebook2(task.JobID)
+ if err != nil {
+ log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
+ return err
+ }
+
+ if result != nil {
+ oldStatus := task.Status
+ task.Status = result.Status
+ if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
+ task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
+ }
+ if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
+ task.EndTime = timeutil.TimeStampNow()
+ }
+ task.CorrectCreateUnix()
+ task.ComputeAndSetDuration()
+ if oldStatus != task.Status {
+ notification.NotifyChangeCloudbrainStatus(task, oldStatus)
+ }
+ if task.FlavorCode == "" {
+ task.FlavorCode = result.Flavor
+ }
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func SyncTempStatusJob() {
+ jobs, err := models.GetCloudBrainTempJobs()
+ if err != nil {
+ log.Error("GetCloudBrainTempJobs failed:%v", err.Error())
+ return
+ }
+
+ for _, temp := range jobs {
+ log.Info("start to handle record: %s", temp.JobName)
+ if temp.Type == models.TypeCloudBrainTwo {
+ if temp.JobType == string(models.JobTypeDebug) {
+ err = handleNotebook(temp)
+ if err != nil {
+ log.Error("handleNotebook falied:%v", err)
+ break
+ }
+ } else if temp.JobType == string(models.JobTypeTrain) || temp.JobType == string(models.JobTypeInference) {
+ _, err = models.GetCloudbrainByJobID(temp.JobID)
+ if err != nil {
+ //one version
+ err = handleTrainJob(temp)
+ if err != nil {
+ log.Error("handleTrainJob falied:%v", err)
+ break
+ }
+ } else {
+ //multi version
+ err = handleTrainJobMultiVersion(temp)
+ if err != nil {
+ log.Error("handleTrainJobMultiVersion falied:%v", err)
+ break
+ }
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func handleNotebook(temp *models.CloudbrainTemp) error {
+ if temp.Status == models.TempJobStatus {
+ err := handleTempNotebook(temp)
+ if err != nil {
+ log.Error("handleTempNotebook failed:%v", err)
+ return err
+ }
+ } else if temp.Status == string(models.ModelArtsStopping) {
+ res, err := GetNotebook2(temp.JobID)
+ if err != nil {
+ log.Error("GetNotebook2 failed:%v", err)
+ return err
+ }
+
+ temp.Status = res.Status
+ if temp.Status == string(models.ModelArtsStopped) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+
+ _, err := DelNotebook2(temp.JobID)
+ if err != nil {
+ log.Error("DelNotebook2 failed:%v", err)
+ return err
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func handleTempNotebook(temp *models.CloudbrainTemp) error {
+ var err error
+ var isExist bool
+
+ for {
+ result, err := GetNotebookList(1000, 0, "createTime", "DESC", temp.JobName)
+ if err != nil {
+ log.Error("GetNotebookList failed:%v", err)
+ break
+ }
+
+ temp.QueryTimes++
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ }
+
+ if result != nil {
+ for _, notebook := range result.NotebookList {
+ if temp.JobID == models.TempJobId {
+ //new notebook
+ if notebook.JobName == temp.JobName {
+ isExist = true
+ temp.Status = notebook.Status
+ temp.JobID = notebook.JobID
+ break
+ }
+ } else {
+ //restart: always can find one record
+ if notebook.JobName == temp.JobName {
+ if notebook.Status != string(models.ModelArtsStopped) {
+ isExist = true
+ temp.Status = notebook.Status
+ temp.JobID = notebook.JobID
+ break
+ }
+ }
+ }
+ }
+
+ if isExist {
+ log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
+ if temp.Status == string(models.ModelArtsCreateFailed) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ break
+ }
+
+ _, err := DelNotebook2(temp.JobID)
+ if err != nil {
+ log.Error("DelNotebook2(%s) failed:%v", temp.JobName, err)
+ break
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ } else {
+ _, err := ManageNotebook2(temp.JobID, models.NotebookAction{Action: models.ActionStop})
+ if err != nil {
+ log.Error("ManageNotebook2(%s) failed:%v", temp.JobName, err)
+ break
+ }
+ temp.Status = string(models.ModelArtsStopping)
+ }
+
+ models.UpdateCloudbrainTemp(temp)
+ } else {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+ } else {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+
+ break
+ }
+
+ if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist {
+ log.Info("reach MaxTempQueryTimes, set the job failed")
+
+ temp.Status = string(models.ModelArtsTrainJobFailed)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ return err
+ }
+ }
+
+ return err
+}
+
+func handleTrainJob(temp *models.CloudbrainTemp) error {
+ if temp.Status == models.TempJobStatus {
+ err := handleTempTrainJob(temp)
+ if err != nil {
+ log.Error("handleTempTrainJob failed:%v", err)
+ return err
+ }
+ } else if temp.Status == string(models.ModelArtsTrainJobKilling) {
+ res, err := GetTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("GetTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = TransTrainJobStatus(res.IntStatus)
+ if temp.Status == string(models.ModelArtsTrainJobKilled) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+
+ _, err := DelTrainJob(temp.JobID)
+ if err != nil {
+ log.Error("DelTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func handleTrainJobMultiVersion(temp *models.CloudbrainTemp) error {
+ if temp.Status == models.TempJobStatus {
+ err := handleTempTrainJobMultiVersion(temp)
+ if err != nil {
+ log.Error("handleTempTrainJobMultiVersion failed:%v", err)
+ return err
+ }
+ } else if temp.Status == string(models.ModelArtsTrainJobKilling) {
+ res, err := GetTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("GetTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = TransTrainJobStatus(res.IntStatus)
+ if temp.Status == string(models.ModelArtsTrainJobKilled) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+
+ _, err := DelTrainJobVersion(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("DelTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+ }
+
+ }
+
+ return nil
+}
+
+func handleTempTrainJobMultiVersion(temp *models.CloudbrainTemp) error {
+ var err error
+ var isExist bool
+
+ for {
+ result, err := GetTrainJobVersionList(1000, 1, temp.JobID)
+ if err != nil {
+ log.Error("GetTrainJobVersionList failed:%v", err)
+ break
+ }
+
+ temp.QueryTimes++
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ }
+
+ if result != nil {
+ count, _ := models.GetCloudbrainCountByJobName(temp.JobName, temp.JobType, temp.Type)
+ if result.VersionCount == int64(count+1) {
+ isExist = true
+ temp.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus)
+ temp.VersionID = strconv.FormatInt(result.JobVersionList[0].VersionID, 10)
+
+ log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
+
+ _, err := StopTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("StopTrainJob failed:%v", err)
+ break
+ }
+ temp.Status = string(models.ModelArtsTrainJobKilling)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ break
+ }
+ } else {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+ }
+
+ break
+ }
+
+ if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist {
+ log.Info("reach MaxTempQueryTimes, set the job failed")
+
+ temp.Status = string(models.ModelArtsTrainJobFailed)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ return err
+ }
+ }
+
+ return err
+}
+
+func handleTempTrainJob(temp *models.CloudbrainTemp) error {
+ var err error
+ var isExist bool
+
+ for {
+ result, err := GetTrainJobList(1000, 1, "create_time", "desc", temp.JobName)
+ if err != nil {
+ log.Error("GetTrainJobList failed:%v", err)
+ break
+ }
+
+ temp.QueryTimes++
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ }
+
+ if result != nil {
+ for _, job := range result.JobList {
+ if temp.JobName == job.JobName && TransTrainJobStatus(job.IntStatus) != string(models.ModelArtsTrainJobFailed) {
+ isExist = true
+ temp.Status = TransTrainJobStatus(job.IntStatus)
+ temp.JobID = strconv.FormatInt(job.JobID, 10)
+ temp.VersionID = strconv.FormatInt(job.VersionID, 10)
+
+ log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
+
+ _, err = StopTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("StopTrainJob(%s) failed:%v", temp.JobName, err)
+ break
+ }
+
+ temp.Status = string(models.ModelArtsTrainJobKilling)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ break
+ }
+ }
+ }
+
+ if !isExist {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+ }
+
+ break
+ }
+
+ if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist {
+ log.Info("reach MaxTempQueryTimes, set the job failed")
+
+ temp.Status = string(models.ModelArtsTrainJobFailed)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ return err
+ }
+ }
+
+ return err
+}
diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go
index 46c273a8b..fd1c467f3 100755
--- a/modules/modelarts/resty.go
+++ b/modules/modelarts/resty.go
@@ -37,6 +37,7 @@ const (
NotebookNotFound = "ModelArts.6404"
NotebookNoPermission = "ModelArts.6407"
NotebookInvalid = "ModelArts.6400"
+ UnknownErrorPrefix = "UNKNOWN:"
)
func getRestyClient() *resty.Client {
@@ -298,6 +299,10 @@ sendjob:
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
+ }
+
if len(response.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
@@ -506,23 +511,27 @@ sendjob:
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
- log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
- BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
- DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
- if temp.ErrorMsg == BootFileErrorMsg {
- log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ log.Error("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
+ dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
+ if temp.ErrorMsg == bootFileErrorMsg {
+ log.Error("启动文件错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("启动文件错误!")
}
- if temp.ErrorMsg == DataSetErrorMsg {
- log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if temp.ErrorMsg == dataSetErrorMsg {
+ log.Error("数据集错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
- return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
}
if !result.IsSuccess {
- log.Error("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
- return &result, fmt.Errorf("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ log.Error("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}
return &result, nil
@@ -547,9 +556,6 @@ sendjob:
return nil, fmt.Errorf("resty create train-job: %s", err)
}
- req, _ := json.Marshal(createJobParams)
- log.Info("%s", req)
-
if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
@@ -563,17 +569,21 @@ sendjob:
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
- BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
- DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
- if temp.ErrorMsg == BootFileErrorMsg {
+ bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
+ dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
+ if temp.ErrorMsg == bootFileErrorMsg {
log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("启动文件错误!")
}
- if temp.ErrorMsg == DataSetErrorMsg {
+ if temp.ErrorMsg == dataSetErrorMsg {
log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
- return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
}
if !result.IsSuccess {
@@ -603,9 +613,6 @@ sendjob:
return nil, fmt.Errorf("resty create train-job version: %s", err)
}
- req, _ := json.Marshal(createJobVersionParams)
- log.Info("%s", req)
-
if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
@@ -618,17 +625,23 @@ sendjob:
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
- BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
- DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
- if temp.ErrorMsg == BootFileErrorMsg {
+
+ log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
+ dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
+ if temp.ErrorMsg == bootFileErrorMsg {
log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("启动文件错误!")
}
- if temp.ErrorMsg == DataSetErrorMsg {
+ if temp.ErrorMsg == dataSetErrorMsg {
log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
- return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
}
if !result.IsSuccess {
@@ -761,9 +774,6 @@ sendjob:
goto sendjob
}
- //temp, _ := json.Marshal(req)
- //log.Info("%s", temp)
-
if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
@@ -1172,7 +1182,11 @@ sendjob:
log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
- return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
}
if !result.IsSuccess {
@@ -1212,7 +1226,11 @@ sendjob:
err = json.Unmarshal(res.Body(), &response)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
- return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
+ }
+
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}
if len(response.ErrorCode) != 0 {
@@ -1271,3 +1289,139 @@ sendjob:
return &result, nil
}
+
+func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.GetTrainJobListResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetQueryParams(map[string]string{
+ "per_page": strconv.Itoa(perPage),
+ "page": strconv.Itoa(page),
+ "sortBy": sortBy,
+ "order": order,
+ "search_content": searchContent,
+ }).
+ SetAuthToken(TOKEN).
+ SetResult(&result).
+ Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
+
+ if err != nil {
+ return nil, fmt.Errorf("resty GetTrainJobList: %v", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf(temp.ErrorMsg)
+ }
+
+ if !result.IsSuccess {
+ log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf(result.ErrorMsg)
+ }
+
+ return &result, nil
+}
+
+func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.GetTrainJobVersionListResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetQueryParams(map[string]string{
+ "per_page": strconv.Itoa(perPage),
+ "page": strconv.Itoa(page),
+ }).
+ SetAuthToken(TOKEN).
+ SetResult(&result).
+ Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
+
+ if err != nil {
+ return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf(temp.ErrorMsg)
+ }
+
+ if !result.IsSuccess {
+ log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf(result.ErrorMsg)
+ }
+
+ return &result, nil
+}
+
+func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.GetNotebookListResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetQueryParams(map[string]string{
+ "limit": strconv.Itoa(limit),
+ "offset": strconv.Itoa(offset),
+ "name": searchContent,
+ "sort_key": sortBy,
+ "sort_dir": order,
+ }).
+ SetAuthToken(TOKEN).
+ SetResult(&result).
+ Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2)
+
+ if err != nil {
+ return nil, fmt.Errorf("resty GetNotebookList: %v", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf(temp.ErrorMsg)
+ }
+
+ return &result, nil
+}
diff --git a/modules/notification/wechat/wechat.go b/modules/notification/wechat/wechat.go
index f77bfe741..cd72bb54e 100644
--- a/modules/notification/wechat/wechat.go
+++ b/modules/notification/wechat/wechat.go
@@ -7,8 +7,8 @@ package wechat
import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth/wechat"
+ "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification/base"
- "code.gitea.io/gitea/modules/setting"
)
type wechatNotifier struct {
@@ -25,20 +25,12 @@ func NewNotifier() base.Notifier {
}
func (*wechatNotifier) NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) {
+ log.Info("NotifyChangeCloudbrainStatus cloudbrain.id=%d cloudbrain.status=%s oldStatus=%s", cloudbrain.ID, cloudbrain.Status, oldStatus)
operateType := wechat.GetJobOperateTypeFromCloudbrainStatus(cloudbrain)
if operateType == "" {
+ log.Info("NotifyChangeCloudbrainStatus operateType is incorrect")
return
}
- switch operateType {
- case wechat.JobOperateTypeStart:
- if len(setting.CloudbrainStartedNotifyList) == 0 {
- return
- }
- for _, v := range setting.CloudbrainStartedNotifyList {
- if v == cloudbrain.JobType {
- go wechat.SendCloudbrainStartedMsg(operateType, *cloudbrain)
- return
- }
- }
- }
+ template := wechat.GetTemplateFromOperateType(operateType)
+ go wechat.SendTemplateMsg(template, &wechat.TemplateContext{Cloudbrain: cloudbrain}, cloudbrain.UserID)
}
diff --git a/modules/setting/setting.go b/modules/setting/setting.go
index 8a8a4a052..1e96ff9da 100755
--- a/modules/setting/setting.go
+++ b/modules/setting/setting.go
@@ -539,6 +539,7 @@ var (
DebugHost string
ImageInfos string
Capacity int
+ MaxTempQueryTimes int
//train-job
ResourcePools string
Engines string
@@ -586,6 +587,12 @@ var (
//wechat template msg config
CloudbrainStartedTemplateId string
CloudbrainStartedNotifyList []string
+ CloudbrainStartedTitle string
+ CloudbrainStartedRemark string
+ CloudbrainStoppedTemplateId string
+ CloudbrainStoppedNotifyList []string
+ CloudbrainStoppedTitle string
+ CloudbrainStoppedRemark string
//nginx proxy
PROXYURL string
@@ -1418,6 +1425,7 @@ func NewContext() {
Flavor = sec.Key("FLAVOR").MustString("")
ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
Capacity = sec.Key("IMAGE_INFOS").MustInt(100)
+ MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30)
ResourcePools = sec.Key("Resource_Pools").MustString("")
Engines = sec.Key("Engines").MustString("")
EngineVersions = sec.Key("Engine_Versions").MustString("")
@@ -1448,6 +1456,12 @@ func NewContext() {
TreePathOfSubscribe = sec.Key("SUBSCRIBE_TREE_PATH").MustString("wechat/subscribe_reply.json")
CloudbrainStartedTemplateId = sec.Key("CLOUDBRAIN_STARTED_TEMPLATE_ID").MustString("")
CloudbrainStartedNotifyList = strings.Split(sec.Key("CLOUDBRAIN_STARTED_NOTIFY_LIST").MustString("DEBUG"), ",")
+ CloudbrainStartedTitle = sec.Key("CLOUDBRAIN_STARTED_TITLE").MustString("您好,您提交的算力资源申请已通过,任务已启动,请您关注运行情况。")
+ CloudbrainStartedRemark = sec.Key("CLOUDBRAIN_STARTED_REMARK").MustString("感谢您的耐心等待。")
+ CloudbrainStoppedTemplateId = sec.Key("CLOUDBRAIN_STOPPED_TEMPLATE_ID").MustString("")
+ CloudbrainStoppedNotifyList = strings.Split(sec.Key("CLOUDBRAIN_STOPPED_NOTIFY_LIST").MustString("TRAIN"), ",")
+ CloudbrainStoppedTitle = sec.Key("CLOUDBRAIN_STOPPED_TITLE").MustString("您好,您申请的算力资源已结束使用,任务已完成运行,请您关注运行结果。")
+ CloudbrainStoppedRemark = sec.Key("CLOUDBRAIN_STOPPED_REMARK").MustString("感谢您的耐心等待。")
SetRadarMapConfig()
diff --git a/modules/storage/minio_ext.go b/modules/storage/minio_ext.go
index 4c0cbac55..4b738c068 100755
--- a/modules/storage/minio_ext.go
+++ b/modules/storage/minio_ext.go
@@ -179,28 +179,39 @@ func GetOneLevelAllObjectUnderDirMinio(bucket string, prefixRootPath string, rel
output, err := core.ListObjects(bucket, Prefix, "", "", 1000)
fileInfos := make([]FileInfo, 0)
prefixLen := len(Prefix)
+ fileMap := make(map[string]bool, 0)
if err == nil {
for _, val := range output.Contents {
+
log.Info("val key=" + val.Key)
var isDir bool
var fileName string
if val.Key == Prefix {
continue
}
- if strings.HasSuffix(val.Key, "/") {
+ fileName = val.Key[prefixLen:]
+ log.Info("fileName =" + fileName)
+ files := strings.Split(fileName, "/")
+ if fileMap[files[0]] {
+ continue
+ } else {
+ fileMap[files[0]] = true
+ }
+ ParenDir := relativePath
+ fileName = files[0]
+ if len(files) > 1 {
isDir = true
- fileName = val.Key[prefixLen : len(val.Key)-1]
- relativePath += val.Key[prefixLen:]
+ ParenDir += fileName + "/"
} else {
isDir = false
- fileName = val.Key[prefixLen:]
}
+
fileInfo := FileInfo{
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"),
FileName: fileName,
Size: val.Size,
IsDir: isDir,
- ParenDir: relativePath,
+ ParenDir: ParenDir,
}
fileInfos = append(fileInfos, fileInfo)
}
diff --git a/modules/storage/obs.go b/modules/storage/obs.go
index 29b7998f7..2cb3af927 100755
--- a/modules/storage/obs.go
+++ b/modules/storage/obs.go
@@ -395,29 +395,6 @@ func GetOneLevelAllObjectUnderDir(bucket string, prefixRootPath string, relative
} else {
isDir = false
}
-
- // if strings.Contains(val.Key[prefixLen:len(val.Key)-1], "/") {
-
- // files := strings.Split(fileName, "/")
- // fileName = files[0]
- // isDir = true
- // if fileMap[files[0]] {
- // continue
- // } else {
- // fileMap[files[0]] = true
- // }
- // } else {
- // if strings.HasSuffix(val.Key, "/") {
- // isDir = true
- // fileName = val.Key[prefixLen : len(val.Key)-1]
- // relativePath += val.Key[prefixLen:]
- // } else {
- // isDir = false
- // fileName = val.Key[prefixLen:]
- // }
- // fileMap[fileName] = true
- // }
-
fileInfo := FileInfo{
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"),
FileName: fileName,
diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini
index cb0ef205f..0fecf7de2 100755
--- a/options/locale/locale_en-US.ini
+++ b/options/locale/locale_en-US.ini
@@ -252,11 +252,12 @@ page_dev_env_desc2_title=Model Management and Sharing
page_dev_env_desc2_desc=Associate the model with the code version, you can adjust the model in different ways based on the historical version of the code and save the results. The trained model can be open and shared, so that more people can use the model to test and give feedback.
page_dev_env_desc3_title=Once Configuration, Multiple Reuse
page_dev_env_desc3_desc=Provide execution environment sharing, Once Configuration, Multiple Reuse. Lower the threshold of model development, and avoid spending repetitive time configuring complex environments.
-page_dev_yunlao=PengCheng Cloudbrain Open Source Collaboration
-page_dev_yunlao_desc1=The platform has been connected with Pengcheng Cloudbrain and can use the rich computing resources of Pengcheng Cloudbrain to complete AI development tasks.
-page_dev_yunlao_desc2=Pengcheng Cloudbrain's existing AI computing power is 100p FLOPS@FP16 (billions of half precision floating-point calculations per second), the main hardware infrastructure is composed of GPU server equipped with NVIDIA Tesla V100 and Atlas 900 AI cluster equipped with Kunpeng and Ascend processors.
-page_dev_yunlao_desc3=Developers can freely choose the corresponding computing resources according to their needs, and can test the adaptability, performance, stability of the model in different hardware environments.
-page_dev_yunlao_desc4=If your model requires more computing resources, you can also apply for it separately.
+page_dev_yunlao=OpenI AI Collaboration Platform
+page_dev_yunlao_desc1=OpenI AI collaboration platform has cooperated with Pengcheng cloud brain and China computing power network (c ² Net) can be used to complete AI development tasks by using the rich computing resources of Pengcheng cloud brain and China computing network.
+page_dev_yunlao_desc2=Pengcheng CloudBrain's existing AI computing power is 100p FLOPS@FP16 (billions of half precision floating-point calculations per second), the main hardware infrastructure consists of GPU servers equipped with NVIDIA Tesla V100 and A100, and Atlas 900 AI clusters equipped with Kunpeng and shengteng processors.
+page_dev_yunlao_desc3=China computing power network (c ² Net) phase I can realize high-speed network interconnection between different artificial intelligence computing centers, and realize reasonable scheduling of computing power and flexible allocation of resources. At present, 11 intelligent computing centers have been connected, and the total scale of computing power is 1924p OPS@FP16 。 Qizhi AI collaboration platform has been connected to Pengcheng Cloud Computing Institute, Chengdu Intelligent Computing Center, Zhongyuan Intelligent Computing Center, Hefei brain and other nodes.
+page_dev_yunlao_desc4=Developers can freely select the corresponding computing resources according to the use needs, and can test the adaptability, performance, stability, etc. of the model in different hardware environments.
+page_dev_yunlao_desc5=If your model requires more computing resources, you can also apply for it separately.
page_dev_yunlao_apply=Apply Separately
search=Search
@@ -1073,6 +1074,7 @@ cloudbrain_operate = Operate
cloudbrain_status_createtime = Status/Createtime
cloudbrain_status_runtime = Running Time
cloudbrain_jobname_err=Name must start with a lowercase letter or number,can include lowercase letter,number,_ and -,can not end with _, and can be up to 36 characters long.
+cloudbrain_bootfile_err=The bootfile does not exist in the repository
cloudbrain_query_fail=Failed to query cloudbrain information.
cloudbrain.mirror_tag = Mirror Tag
cloudbrain.mirror_description = Mirror Description
@@ -3085,6 +3087,8 @@ Platform_Tutorial = Tutorial
foot.advice_feedback = Feedback
[cloudbrain]
+all_resource_cluster=All Cluster
+all_ai_center=All Computing NET
resource_cluster = Resource Cluster
resource_cluster_openi = OpenI Resource Cluster
resource_cluster_c2net = China Computing NET
@@ -3116,7 +3120,8 @@ dataset_path_rule = The dataset location is stored in the environment variable d
view_sample = View sample
inference_output_path_rule = The inference output path is stored in the environment variable result_url.
model_file_path_rule=The model file location is stored in the environment variable ckpt_url
-
+model_file_postfix_rule = The supported format of the model file is [ckpt, pb, h5, json, pkl, pth, t7, pdparams, onnx, pbtxt, keras, mlmodel, cfg, pt]
+model_convert_postfix_rule = The supported format of the model file is [.pth, .pkl, .onnx, .mindir, .ckpt, .pb]
delete_task = Delete task
task_delete_confirm = Are you sure you want to delete this task? Once this task is deleted, it cannot be recovered.
operate_confirm = confirm
@@ -3141,5 +3146,6 @@ Not_Stopped=The job is not stopped, can not be deleted.
Already_stopped=The job is already stopped.
Stopped_failed=Fail to stop the job, please try again later.
Stopped_success_update_status_fail=Succeed in stopping th job, but failed to update the job status and duration time.
+load_code_failed=Fail to load code, please check if the right branch is selected.
error.dataset_select = dataset select error:the count exceed the limit or has same name
diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini
index 652371690..2878627e2 100755
--- a/options/locale/locale_zh-CN.ini
+++ b/options/locale/locale_zh-CN.ini
@@ -254,11 +254,12 @@ page_dev_env_desc2_title=模型管理与共享
page_dev_env_desc2_desc=将模型与代码版本建立关联,可以基于代码历史版本,使用不同的方式调整模型,并将结果保存下来;训练好的模型可以开放共享,让更多人的使用模型测试并提出反馈
page_dev_env_desc3_title=一次配置,多次使用
page_dev_env_desc3_desc=提供执行环境共享,一次配置,多次使用,降低模型开发门槛,避免花费重复的时间配置复杂的环境
-page_dev_yunlao=鹏城云脑开源协同
-page_dev_yunlao_desc1=平台已经与鹏城云脑打通,可以利用鹏城云脑的丰富算力资源,完成AI开发任务
-page_dev_yunlao_desc2=鹏城云脑现有AI算力100P FLOPS@FP16(每秒十亿亿次半精度浮点计算),主要硬件基础设施由搭载英伟达Tesla V100 的GPU服务器和搭载鲲鹏、昇腾处理器的Atlas 900 AI集群构成
-page_dev_yunlao_desc3=开发者可以根据使用需求,自由选择相应计算资源,可以测试模型在不同硬件环境下的适配能力、性能、稳定性等
-page_dev_yunlao_desc4=如果您的模型需要更多的计算资源,也可以单独申请
+page_dev_yunlao=启智AI协作平台
+page_dev_yunlao_desc1=启智AI协作平台已经与鹏城云脑、中国算力网(C²NET)一期打通,可以利用鹏城云脑和中国算力网的丰富算力资源,完成AI开发任务。
+page_dev_yunlao_desc2=鹏城云脑现有AI算力100P FLOPS@FP16(每秒十亿亿次半精度浮点计算),主要硬件基础设施由搭载英伟达Tesla V100 和A100 的GPU服务器,以及搭载鲲鹏、昇腾处理器的Atlas 900 AI集群构成。
+page_dev_yunlao_desc3=中国算力网(C²NET)一期可实现不同人工智能计算中心之间高速网络互联,实现算力合理调度和资源弹性分配。目前已接入11家智算中心,算力总规模1924P OPS@FP16。启智AI协作平台已接入其中的鹏城云计算所、成都智算中心、中原智算中心、合肥类脑等节点。
+page_dev_yunlao_desc4=开发者可以根据使用需求,自由选择相应计算资源,可以测试模型在不同硬件环境下的适配能力、性能、稳定性等。
+page_dev_yunlao_desc5=如果您的模型需要更多的计算资源,也可以单独申请。
page_dev_yunlao_apply=单独申请
search=搜索
@@ -1076,6 +1077,7 @@ cloudbrain_operate=操作
cloudbrain_status_createtime=状态/创建时间
cloudbrain_status_runtime = 运行时长
cloudbrain_jobname_err=只能以小写字母或数字开头且只包含小写字母、数字、_和-,不能以_结尾,最长36个字符。
+cloudbrain_bootfile_err=仓库中不存在启动文件
cloudbrain_query_fail=查询云脑任务失败。
cloudbrain.mirror_tag = 镜像标签
cloudbrain.mirror_description = 镜像描述
@@ -3099,6 +3101,8 @@ Platform_Tutorial=新手指引
foot.advice_feedback = 意见反馈
[cloudbrain]
+all_resource_cluster=全部集群
+all_ai_center=全部智算中心
resource_cluster = 算力集群
resource_cluster_openi = 启智集群
resource_cluster_c2net = 智算网络集群
@@ -3131,7 +3135,8 @@ dataset_path_rule = 数据集位置存储在环境变量data_url中,训练输
view_sample = 查看样例
inference_output_path_rule = 推理输出路径存储在环境变量result_url中。
model_file_path_rule = 模型文件位置存储在环境变量ckpt_url中。
-
+model_file_postfix_rule = 模型文件支持的格式为 [ckpt, pb, h5, json, pkl, pth, t7, pdparams, onnx, pbtxt, keras, mlmodel, cfg, pt]
+model_convert_postfix_rule = 模型文件支持的格式为 [.pth, .pkl, .onnx, .mindir, .ckpt, .pb]
delete_task = 删除任务
task_delete_confirm = 你确认删除该任务么?此任务一旦删除不可恢复。
operate_confirm = 确定操作
@@ -3156,6 +3161,7 @@ Not_Stopped=任务还未终止,不能删除。
Already_stopped=任务已停止。
Stopped_failed=任务停止失败,请稍后再试。
Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。
+load_code_failed=代码加载失败,请确认选择了正确的分支。
error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集
diff --git a/routers/admin/cloudbrains.go b/routers/admin/cloudbrains.go
index 5876baf18..ec0034f4f 100755
--- a/routers/admin/cloudbrains.go
+++ b/routers/admin/cloudbrains.go
@@ -35,10 +35,14 @@ func CloudBrains(ctx *context.Context) {
listType := ctx.Query("listType")
jobType := ctx.Query("jobType")
jobStatus := ctx.Query("jobStatus")
+ aiCenter := ctx.Query("aiCenter")
+ cluster := ctx.Query("cluster")
ctx.Data["ListType"] = listType
ctx.Data["JobType"] = jobType
ctx.Data["JobStatus"] = jobStatus
+ ctx.Data["aiCenter"] = aiCenter
+ ctx.Data["cluster"] = cluster
page := ctx.QueryInt("page")
if page <= 0 {
@@ -80,6 +84,8 @@ func CloudBrains(ctx *context.Context) {
IsLatestVersion: modelarts.IsLatestVersion,
ComputeResource: listType,
Type: models.TypeCloudBrainAll,
+ AiCenter: aiCenter,
+ Cluster: cluster,
})
if err != nil {
ctx.ServerError("Get job failed:", err)
diff --git a/routers/api/v1/repo/cloudbrain_dashboard.go b/routers/api/v1/repo/cloudbrain_dashboard.go
index c632f3c8b..52ee3ed2c 100755
--- a/routers/api/v1/repo/cloudbrain_dashboard.go
+++ b/routers/api/v1/repo/cloudbrain_dashboard.go
@@ -679,6 +679,8 @@ func GetCloudbrainsDetailData(ctx *context.Context) {
jobType := ctx.Query("jobType")
jobStatus := ctx.Query("jobStatus")
cloudBrainType := ctx.QueryInt("Type")
+ aiCenter := ctx.Query("aiCenter")
+ needDeleteInfo := ctx.Query("needDeleteInfo")
page := ctx.QueryInt("page")
pageSize := ctx.QueryInt("pagesize")
@@ -724,6 +726,8 @@ func GetCloudbrainsDetailData(ctx *context.Context) {
NeedRepoInfo: true,
BeginTimeUnix: int64(recordBeginTime),
EndTimeUnix: endTime.Unix(),
+ AiCenter: aiCenter,
+ NeedDeleteInfo: needDeleteInfo,
})
if err != nil {
ctx.ServerError("Get job failed:", err)
diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go
index 7d30614b5..419c6d6a4 100755
--- a/routers/api/v1/repo/modelarts.go
+++ b/routers/api/v1/repo/modelarts.go
@@ -26,40 +26,6 @@ import (
routerRepo "code.gitea.io/gitea/routers/repo"
)
-func GetModelArtsNotebook(ctx *context.APIContext) {
- var (
- err error
- )
-
- jobID := ctx.Params(":jobid")
- repoID := ctx.Repo.Repository.ID
- job, err := models.GetRepoCloudBrainByJobID(repoID, jobID)
- if err != nil {
- ctx.NotFound(err)
- return
- }
- result, err := modelarts.GetJob(jobID)
- if err != nil {
- ctx.NotFound(err)
- return
- }
- oldStatus := job.Status
- job.Status = result.Status
- if oldStatus != result.Status {
- notification.NotifyChangeCloudbrainStatus(job, oldStatus)
- }
- err = models.UpdateJob(job)
- if err != nil {
- log.Error("UpdateJob failed:", err)
- }
-
- ctx.JSON(http.StatusOK, map[string]interface{}{
- "JobID": jobID,
- "JobStatus": result.Status,
- })
-
-}
-
func GetModelArtsNotebook2(ctx *context.APIContext) {
var (
err error
@@ -71,33 +37,16 @@ func GetModelArtsNotebook2(ctx *context.APIContext) {
ctx.NotFound(err)
return
}
- result, err := modelarts.GetNotebook2(job.JobID)
+ err = modelarts.HandleNotebookInfo(job)
if err != nil {
ctx.NotFound(err)
return
}
- if job.StartTime == 0 && result.Lease.UpdateTime > 0 {
- job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
- }
- oldStatus := job.Status
- job.Status = result.Status
- if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) {
- job.EndTime = timeutil.TimeStampNow()
- }
- job.CorrectCreateUnix()
- job.ComputeAndSetDuration()
- if oldStatus != result.Status {
- notification.NotifyChangeCloudbrainStatus(job, oldStatus)
- }
- err = models.UpdateJob(job)
- if err != nil {
- log.Error("UpdateJob failed:", err)
- }
ctx.JSON(http.StatusOK, map[string]interface{}{
"ID": ID,
"JobName": job.JobName,
- "JobStatus": result.Status,
+ "JobStatus": job.Status,
"JobDuration": job.TrainJobDuration,
})
@@ -189,27 +138,11 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
}
}
} else if job.Type == models.TypeCloudBrainTwo {
- result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
+ err := modelarts.HandleTrainJobInfo(job)
if err != nil {
ctx.NotFound(err)
return
}
-
- if job.StartTime == 0 && result.StartTime > 0 {
- job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
- }
- job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
- job.Duration = result.Duration / 1000
- job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
-
- if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
- job.EndTime = job.StartTime.Add(job.Duration)
- }
- job.CorrectCreateUnix()
- err = models.UpdateTrainJobVersion(job)
- if err != nil {
- log.Error("UpdateJob failed:", err)
- }
} else if job.Type == models.TypeC2Net {
result, err := grampus.GetJob(jobID)
if err != nil {
@@ -221,6 +154,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
if job.StartTime == 0 && result.JobInfo.StartedAt > 0 {
job.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
+ oldStatus := job.Status
job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
job.Duration = result.JobInfo.RunSec
job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
@@ -243,6 +177,9 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
aiCenterName = temp[1]
}
}
+ if oldStatus != job.Status {
+ notification.NotifyChangeCloudbrainStatus(job, oldStatus)
+ }
err = models.UpdateTrainJobVersion(job)
if err != nil {
log.Error("UpdateJob failed:", err)
@@ -558,26 +495,11 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) {
ctx.NotFound(err)
return
}
- result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
+ err = modelarts.HandleTrainJobInfo(job)
if err != nil {
ctx.NotFound(err)
return
}
- if job.StartTime == 0 && result.StartTime > 0 {
- job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
- }
- job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
- job.Duration = result.Duration / 1000
- job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
-
- if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
- job.EndTime = job.StartTime.Add(job.Duration)
- }
- job.CorrectCreateUnix()
- err = models.UpdateInferenceJob(job)
- if err != nil {
- log.Error("UpdateJob failed:", err)
- }
ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
diff --git a/routers/home.go b/routers/home.go
index 85057c3a1..1a697946a 100755
--- a/routers/home.go
+++ b/routers/home.go
@@ -40,7 +40,7 @@ const (
tplExploreImages base.TplName = "explore/images"
tplExploreExploreDataAnalysis base.TplName = "explore/data_analysis"
tplHomeTerm base.TplName = "terms"
- tplHomePrivacy base.TplName = "privacy"
+ tplHomePrivacy base.TplName = "privacy"
)
// Home render home page
@@ -93,6 +93,7 @@ func setRecommendURL(ctx *context.Context) {
ctx.Data["page_dev_yunlao_desc2"] = ctx.Tr("home.page_dev_yunlao_desc2")
ctx.Data["page_dev_yunlao_desc3"] = ctx.Tr("home.page_dev_yunlao_desc3")
ctx.Data["page_dev_yunlao_desc4"] = ctx.Tr("home.page_dev_yunlao_desc4")
+ ctx.Data["page_dev_yunlao_desc5"] = ctx.Tr("home.page_dev_yunlao_desc5")
ctx.Data["page_dev_yunlao_apply"] = ctx.Tr("home.page_dev_yunlao_apply")
ctx.Data["page_recommend_activity"] = ctx.Tr("home.page_recommend_activity")
ctx.Data["page_recommend_activity_desc"] = ctx.Tr("home.page_recommend_activity_desc")
diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go
index 32c5ea9f6..c3c6c43cb 100755
--- a/routers/repo/cloudbrain.go
+++ b/routers/repo/cloudbrain.go
@@ -239,6 +239,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName
+ bootFile := strings.TrimSpace(form.BootFile)
repo := ctx.Repo.Repository
tpl := tplCloudBrainNew
@@ -305,6 +306,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
command := cloudbrain.GetCloudbrainDebugCommand()
if jobType == string(models.JobTypeTrain) {
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
+ cloudBrainNewDataPrepare(ctx)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form)
+ return
+ }
tpl = tplCloudBrainTrainJobNew
commandTrain, err := getTrainJobCommand(form)
if err != nil {
@@ -328,12 +336,12 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
if branchName == "" {
branchName = cloudbrain.DefaultBranchName
}
- downloadCode(repo, codePath, branchName)
- uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/")
-
- modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
- mkModelPath(modelPath)
- uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/")
+ errStr = loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath)
+ if errStr != "" {
+ cloudBrainNewDataPrepare(ctx)
+ ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form)
+ return
+ }
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)
@@ -378,6 +386,30 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
}
}
+func loadCodeAndMakeModelPath(repo *models.Repository, codePath string, branchName string, jobName string, resultPath string) string {
+ err := downloadCode(repo, codePath, branchName)
+ if err != nil {
+ return "cloudbrain.load_code_failed"
+ }
+
+ err = uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/")
+ if err != nil {
+ return "cloudbrain.load_code_failed"
+ }
+
+ modelPath := setting.JobPath + jobName + resultPath + "/"
+ err = mkModelPath(modelPath)
+ if err != nil {
+ return "cloudbrain.load_code_failed"
+ }
+ err = uploadCodeToMinio(modelPath, jobName, resultPath+"/")
+ if err != nil {
+ return "cloudbrain.load_code_failed"
+ }
+
+ return ""
+}
+
func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBrainInferencForm) {
ctx.Data["PageIsCloudBrain"] = true
displayJobName := form.DisplayJobName
@@ -389,6 +421,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName
+ bootFile := strings.TrimSpace(form.BootFile)
labelName := form.LabelName
repo := ctx.Repo.Repository
@@ -426,6 +459,14 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
return
}
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
+ cloudBrainNewDataPrepare(ctx)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form)
+ return
+ }
+
count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType)
if err != nil {
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -444,11 +485,12 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
if branchName == "" {
branchName = cloudbrain.DefaultBranchName
}
- downloadCode(repo, codePath, branchName)
- uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/")
- resultPath := setting.JobPath + jobName + cloudbrain.ResultPath + "/"
- mkResultPath(resultPath)
- uploadCodeToMinio(resultPath, jobName, cloudbrain.ResultPath+"/")
+ errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ResultPath)
+ if errStr != "" {
+ cloudBrainNewDataPrepare(ctx)
+ ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form)
+ return
+ }
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)
@@ -886,7 +928,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo
}
}
-
+ ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, false)
ctx.Data["task"] = task
labelName := strings.Fields(task.LabelName)
ctx.Data["LabelName"] = labelName
@@ -1664,11 +1706,7 @@ func uploadCodeToMinio(codePath, jobName, parentDir string) error {
}
func mkModelPath(modelPath string) error {
- return mkPathAndReadMeFile(modelPath, "You can put the model file into this directory and download it by the web page.")
-}
-
-func mkResultPath(resultPath string) error {
- return mkPathAndReadMeFile(resultPath, "You can put the result file into this directory and download it by the web page.")
+ return mkPathAndReadMeFile(modelPath, "You can put the files into this directory and download the files by the web page.")
}
func mkPathAndReadMeFile(path string, text string) error {
@@ -1790,70 +1828,24 @@ func SyncCloudbrainStatus() {
}
} else if task.Type == models.TypeCloudBrainTwo {
if task.JobType == string(models.JobTypeDebug) {
- //result, err := modelarts.GetJob(task.JobID)
- result, err := modelarts.GetNotebook2(task.JobID)
+ err := modelarts.HandleNotebookInfo(task)
if err != nil {
- log.Error("GetJob(%s) failed:%v", task.JobName, err)
+ log.Error("HandleNotebookInfo(%s) failed:%v", task.DisplayJobName, err)
continue
}
-
- if result != nil {
- oldStatus := task.Status
- task.Status = result.Status
- if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
- task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
- }
- if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
- task.EndTime = timeutil.TimeStampNow()
- }
- task.CorrectCreateUnix()
- task.ComputeAndSetDuration()
- if oldStatus != task.Status {
- notification.NotifyChangeCloudbrainStatus(task, oldStatus)
- }
- err = models.UpdateJob(task)
- if err != nil {
- log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
- continue
- }
- }
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
- result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
+ err := modelarts.HandleTrainJobInfo(task)
if err != nil {
- log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
+ log.Error("HandleTrainJobInfo(%s) failed:%v", task.DisplayJobName, err)
continue
}
-
- if result != nil {
- oldStatus := task.Status
- task.Status = modelarts.TransTrainJobStatus(result.IntStatus)
- task.Duration = result.Duration / 1000
- task.TrainJobDuration = result.TrainJobDuration
-
- if task.StartTime == 0 && result.StartTime > 0 {
- task.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
- }
- task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
- if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
- task.EndTime = task.StartTime.Add(task.Duration)
- }
- task.CorrectCreateUnix()
- if oldStatus != task.Status {
- notification.NotifyChangeCloudbrainStatus(task, oldStatus)
- }
- err = models.UpdateJob(task)
- if err != nil {
- log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
- continue
- }
- }
} else {
- log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType)
+ log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType)
}
} else if task.Type == models.TypeC2Net {
result, err := grampus.GetJob(task.JobID)
if err != nil {
- log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
+ log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
continue
}
diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go
index 9a1f9591c..cdde7596c 100755
--- a/routers/repo/grampus.go
+++ b/routers/repo/grampus.go
@@ -3,6 +3,7 @@ package repo
import (
"encoding/json"
"errors"
+ "fmt"
"io/ioutil"
"net/http"
"os"
@@ -45,8 +46,7 @@ func GrampusTrainJobGPUNew(ctx *context.Context) {
ctx.ServerError("get new train-job info failed", err)
return
}
- waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain)
- ctx.Data["WaitCount"] = waitCount
+
ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUNew)
}
@@ -57,8 +57,6 @@ func GrampusTrainJobNPUNew(ctx *context.Context) {
ctx.ServerError("get new train-job info failed", err)
return
}
- waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain)
- ctx.Data["WaitCount"] = waitCount
ctx.HTML(200, tplGrampusTrainJobNPUNew)
}
@@ -131,8 +129,12 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err
if processType == grampus.ProcessorTypeGPU {
ctx.Data["datasetType"] = models.TypeCloudBrainOne
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain)
+ ctx.Data["WaitCount"] = waitCount
} else if processType == grampus.ProcessorTypeNPU {
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain)
+ ctx.Data["WaitCount"] = waitCount
}
return nil
@@ -213,6 +215,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
return
}
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobGPUNew, &form)
+ return
+ }
+
errStr := checkSpecialPool(ctx, "GPU")
if errStr != "" {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
@@ -280,7 +290,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
- ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
return
}
@@ -289,7 +299,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
- ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
return
}
@@ -297,7 +307,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := mkModelPath(modelPath); err != nil {
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
- ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
return
}
@@ -305,7 +315,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
- ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form)
return
}
@@ -338,7 +348,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
EngineName: image,
DatasetName: attachment.Name,
IsLatestVersion: modelarts.IsLatestVersion,
- VersionCount: modelarts.VersionCount,
+ VersionCount: modelarts.VersionCountOne,
WorkServerNumber: 1,
}
@@ -388,7 +398,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
branchName := form.BranchName
isLatestVersion := modelarts.IsLatestVersion
flavorName := form.FlavorName
- versionCount := modelarts.VersionCount
+ versionCount := modelarts.VersionCountOne
engineName := form.EngineName
if !jobNamePattern.MatchString(displayJobName) {
@@ -397,6 +407,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
return
}
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobNPUNew, &form)
+ return
+ }
+
errStr := checkSpecialPool(ctx, "NPU")
if errStr != "" {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
@@ -464,22 +482,22 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
- ctx.RenderWithErr("Create task failed, server timed out", tplGrampusTrainJobNPUNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form)
return
}
//todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
- grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
- ctx.RenderWithErr("Failed to obsMkdir_output", tplGrampusTrainJobNPUNew, &form)
+ grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form)
return
}
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
- ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form)
return
}
@@ -695,7 +713,7 @@ func GrampusTrainJobShow(ctx *context.Context) {
taskList := make([]*models.Cloudbrain, 0)
taskList = append(taskList, task)
ctx.Data["version_list_task"] = taskList
-
+ ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, false)
ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task)
ctx.Data["displayJobName"] = task.DisplayJobName
@@ -845,6 +863,9 @@ func downloadZipCode(ctx *context.Context, codePath, branchName string) error {
log.Error("GetBranchCommit failed:" + err.Error())
return err
}
+ } else {
+ log.Error("the branch is not exist: " + branchName)
+ return fmt.Errorf("The branch does not exist.")
}
archivePath = path.Join(archivePath, grampus.CodeArchiveName)
diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go
index 43f4a6e73..763308930 100755
--- a/routers/repo/modelarts.go
+++ b/routers/repo/modelarts.go
@@ -15,9 +15,6 @@ import (
"time"
"unicode/utf8"
- "code.gitea.io/gitea/modules/notification"
- "code.gitea.io/gitea/modules/timeutil"
-
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/base"
@@ -26,9 +23,11 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
+ "code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/obs"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
+ "code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
@@ -119,8 +118,7 @@ func MustEnableModelArts(ctx *context.Context) {
func NotebookNew(ctx *context.Context) {
notebookNewDataPrepare(ctx)
- waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
- ctx.Data["WaitCount"] = waitCount
+
ctx.HTML(200, tplModelArtsNotebookNew)
}
@@ -150,6 +148,9 @@ func notebookNewDataPrepare(ctx *context.Context) error {
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
+
return nil
}
@@ -270,66 +271,21 @@ func NotebookShow(ctx *context.Context) {
return
}
- result, err := modelarts.GetNotebook2(task.JobID)
- if err != nil {
- log.Error("GET job error", err.Error())
- ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
- return
- }
-
- if result != nil {
- if task.DeletedAt.IsZero() { //normal record
- if task.Status != result.Status {
- oldStatus := task.Status
- task.Status = result.Status
- models.ParseAndSetDurationFromModelArtsNotebook(result, task)
- notification.NotifyChangeCloudbrainStatus(task, oldStatus)
- err = models.UpdateJob(task)
- if err != nil {
- log.Error("GET job error", err.Error())
- ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
- return
- }
- }
- } else { //deleted record
-
- }
- if task.FlavorCode == "" {
- task.FlavorCode = result.Flavor
+ if task.DeletedAt.IsZero() { //normal record
+ err := modelarts.HandleNotebookInfo(task)
+ if err != nil {
+ ctx.Data["error"] = err.Error()
+ ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
+ return
}
+ } else { //deleted record
+
}
datasetDownload := make([]models.DatasetDownload, 0)
if ctx.IsSigned {
if task.Uuid != "" && task.UserID == ctx.User.ID {
- uuidList := strings.Split(task.Uuid, ";")
- for _, uuidStr := range uuidList {
- attachment, err := models.GetAttachmentByUUID(uuidStr)
- if err != nil {
- log.Error("GetAttachmentByUUID failed:%v", err.Error())
- return
- }
- dataset, err := models.GetDatasetByID(attachment.DatasetID)
- if err != nil {
- log.Error("GetDatasetByID failed:%v", err.Error())
- return
- }
- repo, err := models.GetRepositoryByID(dataset.RepoID)
- if err != nil {
- log.Error("GetRepositoryByID failed:%v", err.Error())
- return
- }
- datasetDownload = append(datasetDownload, models.DatasetDownload{
- DatasetName: attachment.Name,
- DatasetDownloadLink: attachment.S3DownloadURL(),
- RepositoryLink: repo.Link() + "/datasets",
- })
-
- }
- // datasetName, err := GetDatasetNameByUUID(task.Uuid)
- // if err == nil {
- // task.DatasetName = datasetName
- // }
+ datasetDownload = GetCloudBrainDataSetInfo(task.Uuid, true)
}
}
user, err := models.GetUserByID(task.UserID)
@@ -375,6 +331,39 @@ func NotebookShow(ctx *context.Context) {
ctx.HTML(200, tplModelArtsNotebookShow)
}
+func GetCloudBrainDataSetInfo(uuid string, isNeedDown bool) []models.DatasetDownload {
+ datasetDownload := make([]models.DatasetDownload, 0)
+
+ uuidList := strings.Split(uuid, ";")
+ for _, uuidStr := range uuidList {
+ attachment, err := models.GetAttachmentByUUID(uuidStr)
+ if err != nil {
+ log.Error("GetAttachmentByUUID failed:%v", err.Error())
+ return datasetDownload
+ }
+ dataset, err := models.GetDatasetByID(attachment.DatasetID)
+ if err != nil {
+ log.Error("GetDatasetByID failed:%v", err.Error())
+ return datasetDownload
+ }
+ repo, err := models.GetRepositoryByID(dataset.RepoID)
+ if err != nil {
+ log.Error("GetRepositoryByID failed:%v", err.Error())
+ return datasetDownload
+ }
+ url := ""
+ if isNeedDown {
+ url = attachment.S3DownloadURL()
+ }
+ datasetDownload = append(datasetDownload, models.DatasetDownload{
+ DatasetName: attachment.Name,
+ DatasetDownloadLink: url,
+ RepositoryLink: repo.Link() + "/datasets",
+ })
+ }
+ return datasetDownload
+}
+
func setShowSpecBySpecialPoolConfig(ctx *context.Context, findSpec bool, task *models.Cloudbrain) {
modelarts.InitSpecialPool()
if modelarts.SpecialPools != nil && !findSpec {
@@ -427,82 +416,127 @@ func NotebookDebug2(ctx *context.Context) {
ctx.Redirect(result.Url + "?token=" + result.Token)
}
-func NotebookManage(ctx *context.Context) {
- var ID = ctx.Params(":id")
- var action = ctx.Params(":action")
- var resultCode = "0"
+func NotebookRestart(ctx *context.Context) {
+ var id = ctx.Params(":id")
+ var resultCode = "-1"
var errorMsg = ""
var status = ""
+ task := ctx.Cloudbrain
+
for {
- task, err := models.GetCloudbrainByID(ID)
- if err != nil {
- log.Error("get task(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "system error"
+ ctx.CheckWechatBind()
+ if ctx.Written() {
+ return
+ }
+ if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
+ log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
+ errorMsg = "the job is not stopped"
break
}
- if action == models.ActionStop {
- if task.Status != string(models.ModelArtsRunning) {
- log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "the job is not running"
- break
- }
-
- if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) {
- log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "you have no right to stop the job"
- break
- }
- } else if action == models.ActionRestart {
- ctx.CheckWechatBind()
- if ctx.Written() {
- return
- }
- if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
- log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "the job is not stopped"
+ count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
+ if err != nil {
+ log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
+ errorMsg = "system error"
+ break
+ } else {
+ if count >= 1 {
+ log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
+ errorMsg = "you have already a running or waiting task, can not create more"
break
}
+ }
- if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin()) {
- log.Error("the user has no right ro restart the job", task.JobName, ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "you have no right to restart the job"
- break
- }
+ createTime := timeutil.TimeStampNow()
+ param := models.NotebookAction{
+ Action: models.ActionStart,
+ }
- count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
- if err != nil {
- log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "system error"
- break
- } else {
- if count >= 1 {
- log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "you have already a running or waiting task, can not create more"
- break
+ res, err := modelarts.ManageNotebook2(task.JobID, param)
+ if err != nil {
+ log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"])
+ /* 暂不处理再次调试502的场景,详情见方案
+ if strings.HasPrefix(err.Error(), modelarts.UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", task.DisplayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: task.JobID,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: task.Type,
+ JobName: task.JobName,
+ JobType: task.JobType,
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
}
}
+ */
+ errorMsg = err.Error()
+ break
+ }
- action = models.ActionStart
- } else {
- log.Error("the action(%s) is illegal", action, ctx.Data["MsgID"])
+ newTask := &models.Cloudbrain{
+ Status: res.Status,
+ UserID: task.UserID,
+ RepoID: task.RepoID,
+ JobID: task.JobID,
+ JobName: task.JobName,
+ DisplayJobName: task.DisplayJobName,
+ JobType: task.JobType,
+ Type: task.Type,
+ Uuid: task.Uuid,
+ Image: task.Image,
+ ComputeResource: task.ComputeResource,
+ Description: task.Description,
+ CreatedUnix: createTime,
+ UpdatedUnix: createTime,
+ FlavorCode: task.FlavorCode,
+ FlavorName: task.FlavorName,
+ }
+
+ err = models.RestartCloudbrain(task, newTask)
+ if err != nil {
+ log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
+ errorMsg = "system error"
+ break
+ }
+
+ status = res.Status
+ resultCode = "0"
+ notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, strconv.FormatInt(newTask.ID, 10), newTask.DisplayJobName, models.ActionCreateDebugNPUTask)
+
+ break
+ }
+
+ ctx.JSON(200, map[string]string{
+ "result_code": resultCode,
+ "error_msg": errorMsg,
+ "status": status,
+ "id": id,
+ })
+}
+
+func NotebookStop(ctx *context.Context) {
+ var id = ctx.Params(":id")
+ var resultCode = "0"
+ var errorMsg = ""
+ var status = ""
+
+ task := ctx.Cloudbrain
+
+ for {
+ if task.Status != string(models.ModelArtsRunning) {
+ log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
- errorMsg = "非法操作"
+ errorMsg = "the job is not running"
break
}
param := models.NotebookAction{
- Action: action,
+ Action: models.ActionStop,
}
- createTime := timeutil.TimeStampNow()
+
res, err := modelarts.ManageNotebook2(task.JobID, param)
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
@@ -515,52 +549,21 @@ func NotebookManage(ctx *context.Context) {
}
status = res.Status
- if action == models.ActionStart {
- newTask := &models.Cloudbrain{
- Status: status,
- UserID: task.UserID,
- RepoID: task.RepoID,
- JobID: task.JobID,
- JobName: task.JobName,
- DisplayJobName: task.DisplayJobName,
- JobType: task.JobType,
- Type: task.Type,
- Uuid: task.Uuid,
- Image: task.Image,
- ComputeResource: task.ComputeResource,
- Description: task.Description,
- CreatedUnix: createTime,
- UpdatedUnix: createTime,
- FlavorCode: task.FlavorCode,
- FlavorName: task.FlavorName,
- }
-
- err = models.RestartCloudbrain(task, newTask)
- if err != nil {
- log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "system error"
- break
- }
- ID = strconv.FormatInt(newTask.ID, 10)
- notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask)
- } else {
- oldStatus := task.Status
- task.Status = res.Status
- if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
- task.EndTime = timeutil.TimeStampNow()
- }
- task.ComputeAndSetDuration()
- if oldStatus != task.Status {
- notification.NotifyChangeCloudbrainStatus(task, oldStatus)
- }
- err = models.UpdateJob(task)
- if err != nil {
- log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
- resultCode = "-1"
- errorMsg = "system error"
- break
- }
+ oldStatus := task.Status
+ task.Status = res.Status
+ if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
+ task.EndTime = timeutil.TimeStampNow()
+ }
+ task.ComputeAndSetDuration()
+ if oldStatus != task.Status {
+ notification.NotifyChangeCloudbrainStatus(task, oldStatus)
+ }
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
+ resultCode = "-1"
+ errorMsg = "system error"
+ break
}
break
@@ -570,7 +573,7 @@ func NotebookManage(ctx *context.Context) {
"result_code": resultCode,
"error_msg": errorMsg,
"status": status,
- "id": ID,
+ "id": id,
})
}
@@ -670,8 +673,6 @@ func TrainJobNew(ctx *context.Context) {
ctx.ServerError("get new train-job info failed", err)
return
}
- waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
- ctx.Data["WaitCount"] = waitCount
ctx.HTML(200, tplModelArtsTrainJobNew)
}
@@ -741,6 +742,8 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
}
ctx.Data["config_list"] = configList.ParaConfigs
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
return nil
}
@@ -857,6 +860,8 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts
ctx.Data["dataset_name"] = datasetNames
ctx.Data["branch_name"] = form.BranchName
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
return nil
}
@@ -868,8 +873,6 @@ func TrainJobNewVersion(ctx *context.Context) {
ctx.ServerError("get new train-job info failed", err)
return
}
- waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
- ctx.Data["WaitCount"] = waitCount
ctx.HTML(200, tplModelArtsTrainJobVersionNew)
}
@@ -962,6 +965,8 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
return err
}
ctx.Data["config_list"] = configList.ParaConfigs
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
return nil
}
@@ -1053,6 +1058,8 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai
}
ctx.Data["config_list"] = configList.ParaConfigs
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
return nil
}
@@ -1077,10 +1084,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
// dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
- branch_name := form.BranchName
+ branchName := form.BranchName
isLatestVersion := modelarts.IsLatestVersion
FlavorName := form.FlavorName
- VersionCount := modelarts.VersionCount
+ VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName
count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
@@ -1105,6 +1112,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
return
}
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err)
+ trainJobErrorNewDataPrepare(ctx, form)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobNew, &form)
+ return
+ }
+
errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
if errStr != "" {
trainJobErrorNewDataPrepare(ctx, form)
@@ -1136,12 +1151,12 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
}
gitRepo, _ := git.OpenRepository(repo.RepoPath())
- commitID, _ := gitRepo.GetBranchCommitID(branch_name)
+ commitID, _ := gitRepo.GetBranchCommitID(branchName)
- if err := downloadCode(repo, codeLocalPath, branch_name); err != nil {
+ if err := downloadCode(repo, codeLocalPath, branchName); err != nil {
log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err)
trainJobErrorNewDataPrepare(ctx, form)
- ctx.RenderWithErr("Create task failed, server timed out", tplModelArtsTrainJobNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form)
return
}
@@ -1165,7 +1180,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
// if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
trainJobErrorNewDataPrepare(ctx, form)
- ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form)
return
}
@@ -1280,7 +1295,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
Parameters: param,
CommitID: commitID,
IsLatestVersion: isLatestVersion,
- BranchName: branch_name,
+ BranchName: branchName,
Params: form.Params,
FlavorName: FlavorName,
EngineName: EngineName,
@@ -1382,7 +1397,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
// dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
- branch_name := form.BranchName
+ branchName := form.BranchName
PreVersionName := form.VersionName
FlavorName := form.FlavorName
EngineName := form.EngineName
@@ -1402,6 +1417,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
return
}
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err)
+ versionErrorDataPrepare(ctx, form)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobVersionNew, &form)
+ return
+ }
+
errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
if errStr != "" {
versionErrorDataPrepare(ctx, form)
@@ -1416,11 +1439,11 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
}
gitRepo, _ := git.OpenRepository(repo.RepoPath())
- commitID, _ := gitRepo.GetBranchCommitID(branch_name)
- if err := downloadCode(repo, codeLocalPath, branch_name); err != nil {
+ commitID, _ := gitRepo.GetBranchCommitID(branchName)
+ if err := downloadCode(repo, codeLocalPath, branchName); err != nil {
log.Error("Failed git clone repo to local(!: %s (%v)", repo.FullName(), err)
versionErrorDataPrepare(ctx, form)
- ctx.RenderWithErr("Failed git clone repo to local!", tplModelArtsTrainJobVersionNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form)
return
}
@@ -1445,7 +1468,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
versionErrorDataPrepare(ctx, form)
- ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form)
return
}
@@ -1570,7 +1593,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
Parameters: param,
PreVersionId: task.VersionID,
CommitID: commitID,
- BranchName: branch_name,
+ BranchName: branchName,
FlavorName: FlavorName,
EngineName: EngineName,
PreVersionName: PreVersionName,
@@ -1758,7 +1781,7 @@ func TrainJobShow(ctx *context.Context) {
return
}
ctx.Data["canNewJob"] = canNewJob
-
+ datasetList := make([][]models.DatasetDownload, 0)
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式
for i, task := range VersionListTasks {
@@ -1781,7 +1804,7 @@ func TrainJobShow(ctx *context.Context) {
} else {
VersionListTasks[i].Parameters = ""
}
-
+ datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, false))
VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
}
@@ -1793,64 +1816,11 @@ func TrainJobShow(ctx *context.Context) {
ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName
ctx.Data["version_list_task"] = VersionListTasks
ctx.Data["version_list_count"] = VersionListCount
+ ctx.Data["datasetList"] = datasetList
ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, &VersionListTasks[0].Cloudbrain)
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
}
-func TrainJobGetLog(ctx *context.Context) {
- ctx.Data["PageIsTrainJob"] = true
-
- var jobID = ctx.Params(":jobid")
- var logFileName = ctx.Query("file_name")
- var baseLine = ctx.Query("base_line")
- var order = ctx.Query("order")
-
- if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
- log.Error("order(%s) check failed", order)
- ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
- return
- }
-
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
- if err != nil {
- log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
- ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
- return
- }
-
- ctx.Data["log"] = result
- //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
-}
-
-func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
- task, err := models.GetCloudbrainByJobID(jobID)
- if err != nil {
- log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
- return nil, nil, err
- }
-
- resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
- if err != nil {
- log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
- return nil, nil, err
- }
-
- result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
- if err != nil {
- log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
- return nil, nil, err
- }
-
- return resultLogFile, result, err
-}
-
func TrainJobDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
var listType = ctx.Query("listType")
@@ -1917,15 +1887,6 @@ func TrainJobStop(ctx *context.Context) {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
}
-func canUserCreateTrainJob(uid int64) (bool, error) {
- org, err := models.GetOrgByName(setting.AllowedOrg)
- if err != nil {
- log.Error("get allowed org failed: ", setting.AllowedOrg)
- return false, err
- }
-
- return org.IsOrgMember(uid)
-}
func canUserCreateTrainJobVersion(ctx *context.Context, userID int64) (bool, error) {
if ctx == nil || ctx.User == nil {
log.Error("user unlogin!")
@@ -2012,12 +1973,12 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
- branch_name := form.BranchName
+ branchName := form.BranchName
FlavorName := form.FlavorName
EngineName := form.EngineName
LabelName := form.LabelName
isLatestVersion := modelarts.IsLatestVersion
- VersionCount := modelarts.VersionCount
+ VersionCount := modelarts.VersionCountOne
trainUrl := form.TrainUrl
modelName := form.ModelName
modelVersion := form.ModelVersion
@@ -2047,6 +2008,14 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
return
}
+ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
+ if err != nil || !bootFileExist {
+ log.Error("Get bootfile error:", err)
+ inferenceJobErrorNewDataPrepare(ctx, form)
+ ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsInferenceJobNew, &form)
+ return
+ }
+
//Determine whether the task name of the task in the project is duplicated
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeInference), displayJobName)
if err == nil {
@@ -2079,12 +2048,12 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
}
gitRepo, _ := git.OpenRepository(repo.RepoPath())
- commitID, _ := gitRepo.GetBranchCommitID(branch_name)
+ commitID, _ := gitRepo.GetBranchCommitID(branchName)
- if err := downloadCode(repo, codeLocalPath, branch_name); err != nil {
+ if err := downloadCode(repo, codeLocalPath, branchName); err != nil {
log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err)
inferenceJobErrorNewDataPrepare(ctx, form)
- ctx.RenderWithErr("Create task failed, server timed out", tplModelArtsInferenceJobNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsInferenceJobNew, &form)
return
}
@@ -2106,7 +2075,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
inferenceJobErrorNewDataPrepare(ctx, form)
- ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsInferenceJobNew, &form)
+ ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsInferenceJobNew, &form)
return
}
@@ -2165,7 +2134,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
Uuid: uuid,
Parameters: param, //modelarts train parameters
CommitID: commitID,
- BranchName: branch_name,
+ BranchName: branchName,
Params: form.Params,
FlavorName: FlavorName,
EngineName: EngineName,
@@ -2317,8 +2286,7 @@ func InferenceJobNew(ctx *context.Context) {
ctx.ServerError("get new inference-job info failed", err)
return
}
- waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
- ctx.Data["WaitCount"] = waitCount
+
ctx.HTML(200, tplModelArtsInferenceJobNew)
}
func inferenceJobNewDataPrepare(ctx *context.Context) error {
@@ -2389,6 +2357,8 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error {
})
ctx.Data["MODEL_COUNT"] = model_count
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
return nil
}
@@ -2462,6 +2432,8 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel
ctx.Data["ckpt_name"] = form.CkptName
ctx.Data["train_url"] = form.TrainUrl
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
+ waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
+ ctx.Data["WaitCount"] = waitCount
return nil
}
@@ -2515,7 +2487,7 @@ func InferenceJobShow(ctx *context.Context) {
ctx.Data["displayJobName"] = task.DisplayJobName
ctx.Data["task"] = task
ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task)
-
+ ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, false)
tempUids := []int64{}
tempUids = append(tempUids, task.UserID)
JobCreater, err := models.GetUserNamesByIDs(tempUids)
diff --git a/routers/routes/routes.go b/routers/routes/routes.go
index 03b53c5cd..a4bc09472 100755
--- a/routers/routes/routes.go
+++ b/routers/routes/routes.go
@@ -1183,7 +1183,8 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/:id", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2)
- m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage)
+ m.Post("/restart", cloudbrain.AdminOrJobCreaterRight, repo.NotebookRestart)
+ m.Post("/stop", cloudbrain.AdminOrJobCreaterRight, repo.NotebookStop)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel)
})
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew)
diff --git a/routers/search.go b/routers/search.go
index 628350424..8453d5c18 100644
--- a/routers/search.go
+++ b/routers/search.go
@@ -314,7 +314,7 @@ func searchRepo(ctx *context.Context, TableName string, Key string, Page int, Pa
res, err := client.Search(TableName).Query(boolQ).SortBy(getSort(SortBy, ascending, "num_stars", false)...).From(from).Size(Size).Highlight(queryHighlight("alias", "description", "topics")).Do(ctx.Req.Context())
if err == nil {
esresult := makeRepoResult(res, Key, OnlyReturnNum, language)
- setForkRepoOrder(esresult)
+ setForkRepoOrder(esresult, SortBy)
resultObj.Total = resultObj.PrivateTotal + esresult.Total
isNeedSort := false
if len(resultObj.Result) > 0 {
@@ -347,24 +347,26 @@ func searchRepo(ctx *context.Context, TableName string, Key string, Page int, Pa
}
}
-func setForkRepoOrder(esresult *SearchRes) {
- forkidMap := make(map[string]int, 0)
- for index, re := range esresult.Result {
- if re["fork_id"] != nil {
- fork_id := re["fork_id"].(string)
- if _, ok := forkidMap[fork_id]; !ok {
- forkidMap[fork_id] = index
+func setForkRepoOrder(esresult *SearchRes, SortBy string) {
+ if SortBy == "default" || SortBy == "" {
+ forkidMap := make(map[string]int, 0)
+ for index, re := range esresult.Result {
+ if re["fork_id"] != nil {
+ fork_id := re["fork_id"].(string)
+ if _, ok := forkidMap[fork_id]; !ok {
+ forkidMap[fork_id] = index
+ }
}
}
- }
- for key, value := range forkidMap {
- for index, re := range esresult.Result {
- if re["id"].(string) == key {
- if value < index { //swap
- tmp := esresult.Result[index]
- esresult.Result[index] = esresult.Result[value]
- esresult.Result[value] = tmp
- break
+ for key, value := range forkidMap {
+ for index, re := range esresult.Result {
+ if re["id"].(string) == key {
+ if value < index { //swap
+ tmp := esresult.Result[index]
+ esresult.Result[index] = esresult.Result[value]
+ esresult.Result[value] = tmp
+ break
+ }
}
}
}
diff --git a/routers/user/home.go b/routers/user/home.go
index 25b1c518e..d8c2565c6 100755
--- a/routers/user/home.go
+++ b/routers/user/home.go
@@ -761,10 +761,14 @@ func Cloudbrains(ctx *context.Context) {
listType := ctx.Query("listType")
jobType := ctx.Query("jobType")
jobStatus := ctx.Query("jobStatus")
+ aiCenter := ctx.Query("aiCenter")
+ cluster := ctx.Query("cluster")
ctx.Data["ListType"] = listType
ctx.Data["JobType"] = jobType
ctx.Data["JobStatus"] = jobStatus
+ ctx.Data["aiCenter"] = aiCenter
+ ctx.Data["cluster"] = cluster
page := ctx.QueryInt("page")
if page <= 0 {
@@ -825,6 +829,8 @@ func Cloudbrains(ctx *context.Context) {
RepoIDList: repoIDList,
ComputeResource: listType,
Type: models.TypeCloudBrainAll,
+ AiCenter: aiCenter,
+ Cluster: cluster,
})
if err != nil {
ctx.ServerError("Get job failed:", err)
diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl
index cd5913c40..2f102f10a 100755
--- a/templates/admin/cloudbrain/list.tmpl
+++ b/templates/admin/cloudbrain/list.tmpl
@@ -14,6 +14,10 @@
人工智能算力网络推进联盟已接入10家智算中心,算力总规模1542P
+人工智能算力网络推进联盟已接入11家智算中心,算力总规模1924P
{{.page_dev_yunlao_desc1}}
{{.page_dev_yunlao_desc2}}
{{.page_dev_yunlao_desc3}}
- {{.page_dev_yunlao_desc4}}
+ {{.page_dev_yunlao_desc4}}
+ {{.page_dev_yunlao_desc5}}
1111
+' + data.Content) - } - - } - }).fail(function (err) { - console.log(err); - }); - } - if ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].includes(scrollTop) && scrollLeft == 0) { - let start_line = $(`#log${version_name} input[name=start_line]`).val() - $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=${start_line}&lines=50&order=asc`, (data) => { - if (data.Lines == 0) { - $(`.message${version_name} #header`).text('您已翻阅至日志顶部') - $(`.message${version_name}`).css('display', 'block') - setTimeout(function () { - $(`.message${version_name}`).css('display', 'none') - }, 1000) - } else { - $(`#log${version_name} input[name=start_line]`).val(data.StartLine) //如果变动就改变所对应的值 - $(`#log${version_name}`).prepend('' + data.Content) - } - }).fail(function (err) { - console.log(err); - }); - } - } - function scrollAnimation(dom, currentY, targetY, currentX) { - let needScrollTop = targetY - currentY; - let _currentY = currentY; - setTimeout(() => { - // 一次调用滑动帧数,每次调用会不一样 - //取总距离的十分之一 - const dist = Math.ceil(needScrollTop / 10); - _currentY += dist; - //移动一个十分之一 - dom.scrollTo(currentX || 0, _currentY, 'smooth'); - // 如果移动幅度小于十个像素,直接移动,否则递归调用,实现动画效果 - if (needScrollTop > 10 || needScrollTop < -10) { - scrollAnimation(dom, _currentY, targetY) - } else { - dom.scrollTo(0, targetY, 'smooth') - } - }, 1) - } - - $('.log_top').click(function () { - // let logContentDom = document.querySelector('.log') - // if(!logContentDom) - // return - // let version_name = $('.log_top').data('version') - let version_name = $(this).data('version') - let logContentDom = document.querySelector(`#log${version_name}`) - - $(`#log_file${version_name}`).siblings('pre').remove() - $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=&lines=50&order=asc`, (data) => { - - $(`#log${version_name} input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 - $(`#log${version_name} input[name=start_line]`).val(data.StartLine) - $(`#log${version_name}`).prepend('' + data.Content) - $(`.message${version_name} #header`).text('您已翻阅至日志顶部') - $(`.message${version_name}`).css('display', 'block') - setTimeout(function () { - $(`.message${version_name}`).css('display', 'none') - }, 1000) - scrollAnimation(logContentDom, logContentDom.scrollTop, 0); - }) - - }) - $('.log_bottom').click(function (e) { - let version_name = $(this).data('version') - let logContentDom = document.querySelector(`#log${version_name}`) - $(`#log_file${version_name}`).siblings('pre').remove() - $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=&lines=50&order=desc`, (data) => { - - $(`#log${version_name} input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 - $(`#log${version_name} input[name=start_line]`).val(data.StartLine) - $(`#log${version_name}`).append('' + data.Content) - $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=${data.EndLine}&lines=50&order=desc`, (data) => { - if (data.Lines == 0) { - $(`.message${version_name} #header`).text('您已翻阅至日志底部') - $(`.message${version_name}`).css('display', 'block') - setTimeout(function () { - $(`.message${version_name}`).css('display', 'none') - }, 1000) - } else { - if (end_line === data.EndLine) { - return - } - else { - $(`#log${version_name} input[name=end_line]`).val(data.EndLine) - $(`#log${version_name}`).append('' + data.Content) - } - - } - }).fail(function (err) { - console.log(err); - }); - scrollAnimation(logContentDom, logContentDom.scrollTop + 1, logContentDom.scrollHeight - logContentDom.clientHeight); - }) - }) + diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index 886469d4c..6712f5e7d 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -51,18 +51,7 @@ } - --+{{template "custom/global_mask" .}}- - - - - --{{template "repo/header" .}}+diff --git a/templates/repo/modelmanage/convertIndex.tmpl b/templates/repo/modelmanage/convertIndex.tmpl index 4032b28d5..ee2d56ee3 100644 --- a/templates/repo/modelmanage/convertIndex.tmpl +++ b/templates/repo/modelmanage/convertIndex.tmpl @@ -254,6 +254,10 @@+ ++
\n", f.styleAttr(css, chroma.LineTableTD)) + fmt.Fprintf(w, f.preWrapper.Start(false, f.styleAttr(css, chroma.PreWrapper))) + for index := range lines { + line := f.baseLineNumber + index + highlight, next := f.shouldHighlight(highlightIndex, line) + if next { + highlightIndex++ + } + if highlight { + fmt.Fprintf(w, "", f.styleAttr(css, chroma.LineHighlight)) + } + + fmt.Fprintf(w, "%s\n", f.styleAttr(css, chroma.LineNumbersTable), f.lineIDAttribute(line), f.lineTitleWithLinkIfNeeded(lineDigits, line)) + + if highlight { + fmt.Fprintf(w, "") + } + } + fmt.Fprint(w, f.preWrapper.End(false)) + fmt.Fprint(w, " | \n") + fmt.Fprintf(w, "\n", f.styleAttr(css, chroma.LineTableTD, "width:100%")) + } + + fmt.Fprintf(w, f.preWrapper.Start(true, f.styleAttr(css, chroma.PreWrapper))) + + highlightIndex = 0 + for index, tokens := range lines { + // 1-based line number. + line := f.baseLineNumber + index + highlight, next := f.shouldHighlight(highlightIndex, line) + if next { + highlightIndex++ + } + + // Start of Line + fmt.Fprint(w, ``) + } else { + fmt.Fprintf(w, "%s>", f.styleAttr(css, chroma.Line)) + } + + // Line number + if f.lineNumbers && !wrapInTable { + fmt.Fprintf(w, "%s", f.styleAttr(css, chroma.LineNumbers), f.lineIDAttribute(line), f.lineTitleWithLinkIfNeeded(lineDigits, line)) + } + + fmt.Fprintf(w, ``, f.styleAttr(css, chroma.CodeLine)) + + for _, token := range tokens { + html := html.EscapeString(token.String()) + attr := f.styleAttr(css, token.Type) + if attr != "" { + html = fmt.Sprintf("%s", attr, html) + } + fmt.Fprint(w, html) + } + + fmt.Fprint(w, ``) // End of CodeLine + + fmt.Fprint(w, ``) // End of Line + } + + fmt.Fprintf(w, f.preWrapper.End(true)) + + if wrapInTable { + fmt.Fprint(w, " |