Browse Source

Merge pull request 'V20220908合并' (#2875) from V20220908 into develop

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/2875
tags/v1.22.9.1
ychao_1983 2 years ago
parent
commit
f8ba4a25fd
64 changed files with 3254 additions and 876 deletions
  1. +101
    -38
      models/cloudbrain.go
  2. +109
    -0
      models/cloudbrain_spec.go
  3. +3
    -7
      models/dataset.go
  4. +1
    -0
      models/models.go
  5. +14
    -0
      models/repo_collaboration.go
  6. +2
    -0
      models/resource_queue.go
  7. +281
    -5
      models/resource_specification.go
  8. +2
    -0
      modules/auth/cloudbrain.go
  9. +1
    -2
      modules/auth/grampus.go
  10. +3
    -0
      modules/auth/modelarts.go
  11. +1
    -1
      modules/auth/wechat/cloudbrain.go
  12. +18
    -76
      modules/cloudbrain/cloudbrain.go
  13. +9
    -11
      modules/grampus/grampus.go
  14. +44
    -23
      modules/modelarts/modelarts.go
  15. +215
    -0
      modules/modelarts_cd/modelarts.go
  16. +220
    -0
      modules/modelarts_cd/resty.go
  17. +42
    -0
      modules/modelarts_gateway/core/escape.go
  18. +208
    -0
      modules/modelarts_gateway/core/signer.go
  19. +76
    -8
      modules/setting/setting.go
  20. +5
    -0
      options/locale/locale_en-US.ini
  21. +5
    -0
      options/locale/locale_zh-CN.ini
  22. +2
    -2
      routers/admin/cloudbrains.go
  23. +42
    -4
      routers/admin/resources.go
  24. +139
    -30
      routers/api/v1/repo/cloudbrain.go
  25. +2
    -0
      routers/private/internal.go
  26. +187
    -273
      routers/repo/cloudbrain.go
  27. +2
    -7
      routers/repo/dataset.go
  28. +42
    -15
      routers/repo/grampus.go
  29. +241
    -151
      routers/repo/modelarts.go
  30. +1
    -0
      routers/response/response_list.go
  31. +1
    -3
      routers/user/home.go
  32. +470
    -1
      services/cloudbrain/resource/resource_specification.go
  33. +12
    -4
      templates/admin/cloudbrain/list.tmpl
  34. +1
    -1
      templates/admin/resources/queue.tmpl
  35. +1
    -1
      templates/admin/resources/scene.tmpl
  36. +1
    -1
      templates/admin/resources/specification.tmpl
  37. +25
    -0
      templates/custom/task_wait_count.tmpl
  38. +49
    -9
      templates/repo/cloudbrain/benchmark/new.tmpl
  39. +59
    -19
      templates/repo/cloudbrain/benchmark/show.tmpl
  40. +37
    -11
      templates/repo/cloudbrain/inference/new.tmpl
  41. +58
    -6
      templates/repo/cloudbrain/inference/show.tmpl
  42. +42
    -11
      templates/repo/cloudbrain/new.tmpl
  43. +17
    -6
      templates/repo/cloudbrain/show.tmpl
  44. +44
    -14
      templates/repo/cloudbrain/trainjob/new.tmpl
  45. +16
    -2
      templates/repo/cloudbrain/trainjob/show.tmpl
  46. +45
    -17
      templates/repo/grampus/trainjob/gpu/new.tmpl
  47. +38
    -10
      templates/repo/grampus/trainjob/npu/new.tmpl
  48. +14
    -2
      templates/repo/grampus/trainjob/show.tmpl
  49. +35
    -8
      templates/repo/modelarts/inferencejob/new.tmpl
  50. +15
    -2
      templates/repo/modelarts/inferencejob/show.tmpl
  51. +33
    -6
      templates/repo/modelarts/notebook/new.tmpl
  52. +23
    -8
      templates/repo/modelarts/notebook/show.tmpl
  53. +52
    -10
      templates/repo/modelarts/trainjob/new.tmpl
  54. +18
    -1
      templates/repo/modelarts/trainjob/show.tmpl
  55. +30
    -53
      templates/repo/modelarts/trainjob/version_new.tmpl
  56. +12
    -5
      templates/user/dashboard/cloudbrains.tmpl
  57. +31
    -0
      web_src/js/standalone/specsuse.js
  58. +1
    -1
      web_src/vuepages/const/index.js
  59. +8
    -2
      web_src/vuepages/langs/config/en-US.js
  60. +7
    -1
      web_src/vuepages/langs/config/zh-CN.js
  61. +3
    -2
      web_src/vuepages/pages/resources/components/QueueDialog.vue
  62. +1
    -1
      web_src/vuepages/pages/resources/components/SceneDialog.vue
  63. +7
    -2
      web_src/vuepages/pages/resources/components/SpecificationDialog.vue
  64. +30
    -3
      web_src/vuepages/pages/resources/specification/index.vue

+ 101
- 38
models/cloudbrain.go View File

@@ -25,7 +25,8 @@ type ModelArtsJobStatus string
const (
TypeCloudBrainOne int = iota
TypeCloudBrainTwo
TypeC2Net //智算网络
TypeC2Net //智算网络
TypeCDCenter //成都智算中心

TypeCloudBrainAll = -1
)
@@ -120,6 +121,11 @@ const (
//AI center
AICenterOfCloudBrainOne = "OpenIOne"
AICenterOfCloudBrainTwo = "OpenITwo"
AICenterOfChengdu = "OpenIChengdu"

//ComputeResource
GPU = "GPU"
NPU = "NPU"
)

type Cloudbrain struct {
@@ -190,6 +196,7 @@ type Cloudbrain struct {
BenchmarkTypeRankLink string `xorm:"-"`
StartTime timeutil.TimeStamp
EndTime timeutil.TimeStamp
Spec *Specification `xorm:"-"`
}

func (task *Cloudbrain) ComputeAndSetDuration() {
@@ -596,37 +603,17 @@ type ResourceSpec struct {
ShareMemMiB int `json:"shareMemMiB"`
}

type FlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type SpecialPools struct {
Pools []*SpecialPool `json:"pools"`
}
type SpecialPool struct {
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
Flavor []*FlavorInfo `json:"flavor"`
}

type ImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}

type ImageInfoModelArts struct {
Id string `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
Flavor []*setting.FlavorInfo `json:"flavor"`
}

type PoolInfos struct {
@@ -732,6 +719,17 @@ type CreateNotebook2Params struct {
Volume VolumeReq `json:"volume"`
}

type CreateNotebookWithoutPoolParams struct {
JobName string `json:"name"`
Description string `json:"description"`
Duration int64 `json:"duration"` //ms
Feature string `json:"feature"`
Flavor string `json:"flavor"`
ImageID string `json:"image_id"`
WorkspaceID string `json:"workspace_id"`
Volume VolumeReq `json:"volume"`
}

type VolumeReq struct {
Capacity int `json:"capacity"`
Category string `json:"category"`
@@ -955,6 +953,7 @@ type NotebookGetJobTokenResult struct {
}

type NotebookDelResult struct {
NotebookResult
InstanceID string `json:"instance_id"`
}

@@ -1481,12 +1480,6 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
)
}

if len(opts.ComputeResource) > 0 {
cond = cond.And(
builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource},
)
}

if len(opts.JobTypes) > 0 {
if opts.JobTypeNot {
cond = cond.And(
@@ -1506,7 +1499,7 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
if (opts.Cluster) != "" {
if opts.Cluster == "resource_cluster_openi" {
cond = cond.And(
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}),
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, builder.Eq{"cloudbrain.type": TypeCDCenter}),
)
}
if opts.Cluster == "resource_cluster_c2net" {
@@ -1720,11 +1713,24 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e
}

func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
session := x.NewSession()
defer session.Close()

err = session.Begin()
cloudbrain.TrainJobDuration = DURATION_STR_ZERO
if _, err = x.NoAutoTime().Insert(cloudbrain); err != nil {
if _, err = session.NoAutoTime().InsertOne(cloudbrain); err != nil {
session.Rollback()
return err
}

if cloudbrain.Spec != nil {
if _, err = session.Insert(NewCloudBrainSpec(cloudbrain.ID, *cloudbrain.Spec)); err != nil {
session.Rollback()
return err
}
}
session.Commit()

go IncreaseDatasetUseCount(cloudbrain.Uuid)
return nil
}
@@ -1959,7 +1965,7 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy

func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) {
count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting).
And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
And("job_type = ? and user_id = ? and type in (?,?)", JobTypeDebug, userID, TypeCloudBrainTwo, TypeCDCenter).Count(new(Cloudbrain))
return int(count), err
}

@@ -2003,11 +2009,18 @@ func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) {
return err
}

if _, err = sess.NoAutoTime().Insert(new); err != nil {
if _, err = sess.NoAutoTime().InsertOne(new); err != nil {
sess.Rollback()
return err
}

if new.Spec != nil {
if _, err = sess.Insert(NewCloudBrainSpec(new.ID, *new.Spec)); err != nil {
sess.Rollback()
return err
}
}

if err = sess.Commit(); err != nil {
return err
}
@@ -2399,7 +2412,57 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) {
Find(&cloudbrains)
}

func GetCloudbrainWithDeletedByIDs(ids []int64) ([]*Cloudbrain, error) {
cloudbrains := make([]*Cloudbrain, 0)
return cloudbrains, x.
In("id", ids).Unscoped().Find(&cloudbrains)
}

func GetCloudbrainCountByJobName(jobName, jobType string, typeCloudbrain int) (int, error) {
count, err := x.Where("job_name = ? and job_type= ? and type = ?", jobName, jobType, typeCloudbrain).Count(new(Cloudbrain))
return int(count), err
}

func LoadSpecs(tasks []*Cloudbrain) error {
cloudbrainIds := make([]int64, len(tasks))
for i, v := range tasks {
cloudbrainIds[i] = v.ID
}
specs := make([]*CloudbrainSpec, 0)
err := x.In("cloudbrain_id", cloudbrainIds).Find(&specs)
if err != nil {
return err
}
specMap := make(map[int64]*CloudbrainSpec)
for _, v := range specs {
specMap[v.SpecId] = v
}
for _, v := range tasks {
if specMap[v.ID] != nil {
v.Spec = specMap[v.ID].ConvertToSpecification()
}
}
return nil
}

func LoadSpecs4CloudbrainInfo(tasks []*CloudbrainInfo) error {
cloudbrainIds := make([]int64, len(tasks))
for i, v := range tasks {
cloudbrainIds[i] = v.Cloudbrain.ID
}
specs := make([]*CloudbrainSpec, 0)
err := x.In("cloudbrain_id", cloudbrainIds).Find(&specs)
if err != nil {
return err
}
specMap := make(map[int64]*CloudbrainSpec)
for _, v := range specs {
specMap[v.CloudbrainID] = v
}
for _, v := range tasks {
if specMap[v.Cloudbrain.ID] != nil {
v.Cloudbrain.Spec = specMap[v.Cloudbrain.ID].ConvertToSpecification()
}
}
return nil
}

+ 109
- 0
models/cloudbrain_spec.go View File

@@ -0,0 +1,109 @@
package models

import (
"code.gitea.io/gitea/modules/timeutil"
)

type CloudbrainSpec struct {
CloudbrainID int64 `xorm:"pk"`
SpecId int64 `xorm:"index"`
SourceSpecId string
AccCardsNum int
AccCardType string
CpuCores int
MemGiB float32
GPUMemGiB float32
ShareMemGiB float32
ComputeResource string
UnitPrice int
QueueId int64
QueueCode string
Cluster string
AiCenterCode string
AiCenterName string
IsExclusive bool
ExclusiveOrg string
CreatedTime timeutil.TimeStamp `xorm:"created"`
UpdatedTime timeutil.TimeStamp `xorm:"updated"`
}

func (s CloudbrainSpec) ConvertToSpecification() *Specification {
return &Specification{
ID: s.SpecId,
SourceSpecId: s.SourceSpecId,
AccCardsNum: s.AccCardsNum,
AccCardType: s.AccCardType,
CpuCores: s.CpuCores,
MemGiB: s.MemGiB,
GPUMemGiB: s.GPUMemGiB,
ShareMemGiB: s.ShareMemGiB,
ComputeResource: s.ComputeResource,
UnitPrice: s.UnitPrice,
QueueId: s.QueueId,
QueueCode: s.QueueCode,
Cluster: s.Cluster,
AiCenterCode: s.AiCenterCode,
AiCenterName: s.AiCenterName,
IsExclusive: s.IsExclusive,
ExclusiveOrg: s.ExclusiveOrg,
}
}

func NewCloudBrainSpec(cloudbrainId int64, s Specification) CloudbrainSpec {
return CloudbrainSpec{
CloudbrainID: cloudbrainId,
SpecId: s.ID,
SourceSpecId: s.SourceSpecId,
AccCardsNum: s.AccCardsNum,
AccCardType: s.AccCardType,
CpuCores: s.CpuCores,
MemGiB: s.MemGiB,
GPUMemGiB: s.GPUMemGiB,
ShareMemGiB: s.ShareMemGiB,
ComputeResource: s.ComputeResource,
UnitPrice: s.UnitPrice,
QueueId: s.QueueId,
QueueCode: s.QueueCode,
Cluster: s.Cluster,
AiCenterCode: s.AiCenterCode,
AiCenterName: s.AiCenterName,
IsExclusive: s.IsExclusive,
ExclusiveOrg: s.ExclusiveOrg,
}
}

func InsertCloudbrainSpec(c CloudbrainSpec) (int64, error) {
return x.Insert(&c)
}

func GetCloudbrainSpecByID(cloudbrainId int64) (*CloudbrainSpec, error) {
r := &CloudbrainSpec{}
if has, err := x.Where("cloudbrain_id = ?", cloudbrainId).Get(r); err != nil {
return nil, err
} else if !has {
return nil, nil
}
return r, nil
}

func FindCloudbrainTask(page, pageSize int) ([]*Cloudbrain, error) {
r := make([]*Cloudbrain, 0)
err := x.Unscoped().
Limit(pageSize, (page-1)*pageSize).
OrderBy("cloudbrain.id").
Find(&r)
if err != nil {
return nil, err
}
return r, nil
}

func CountNoSpecHistoricTask() (int64, error) {
n, err := x.Unscoped().
Where(" 1=1 and not exists (select 1 from cloudbrain_spec where cloudbrain.id = cloudbrain_spec.cloudbrain_id)").
Count(&Cloudbrain{})
if err != nil {
return 0, err
}
return n, nil
}

+ 3
- 7
models/dataset.go View File

@@ -130,15 +130,11 @@ func (datasets DatasetList) loadAttachmentAttributes(opts *SearchDatasetOptions)

permission = false
datasets[i].Repo.GetOwner()
if datasets[i].Repo.Owner.IsOrganization() {
if datasets[i].Repo.Owner.IsUserPartOfOrg(opts.User.ID) {
log.Info("user is member of org.")
permission = true
}
}
if !permission {
isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID)
if isCollaborator ||datasets[i].Repo.IsOwnedBy(opts.User.ID){
isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID)

if isCollaborator ||isInRepoTeam {
log.Info("Collaborator user may visit the attach.")
permission = true
}


+ 1
- 0
models/models.go View File

@@ -150,6 +150,7 @@ func init() {
new(ResourceScene),
new(ResourceSceneSpec),
new(AdminOperateLog),
new(CloudbrainSpec),
new(CloudbrainTemp),
new(DatasetReference),
)


+ 14
- 0
models/repo_collaboration.go View File

@@ -130,6 +130,20 @@ func (repo *Repository) IsCollaborator(userID int64) (bool, error) {
return repo.isCollaborator(x, userID)
}

func (repo *Repository) IsInRepoTeam(userID int64) (bool, error) {
teams,err:=repo.GetRepoTeams()
if err!=nil || len(teams)==0{
return false,err
}
for _,team :=range teams{
if team.IsMember(userID){
return true,nil
}
}
return false,nil

}

func (repo *Repository) changeCollaborationAccessMode(e Engine, uid int64, mode AccessMode) error {
// Discard invalid input
if mode <= AccessModeNone || mode > AccessModeOwner {


+ 2
- 0
models/resource_queue.go View File

@@ -71,6 +71,8 @@ func (r ResourceQueueReq) ToDTO() ResourceQueue {
q.AiCenterName = "云脑一"
} else if r.AiCenterCode == AICenterOfCloudBrainTwo {
q.AiCenterName = "云脑二"
} else if r.AiCenterCode == AICenterOfChengdu {
q.AiCenterName = "启智成都智算"
}
}
return q


+ 281
- 5
models/resource_specification.go View File

@@ -2,6 +2,7 @@ package models

import (
"code.gitea.io/gitea/modules/timeutil"
"fmt"
"xorm.io/builder"
)

@@ -22,6 +23,7 @@ type ResourceSpecification struct {
ShareMemGiB float32
UnitPrice int
Status int
IsAvailable bool
IsAutomaticSync bool
CreatedTime timeutil.TimeStamp `xorm:"created"`
CreatedBy int64
@@ -40,6 +42,7 @@ func (r ResourceSpecification) ConvertToRes() *ResourceSpecificationRes {
GPUMemGiB: r.GPUMemGiB,
UnitPrice: r.UnitPrice,
Status: r.Status,
IsAvailable: r.IsAvailable,
UpdatedTime: r.UpdatedTime,
}
}
@@ -72,14 +75,16 @@ func (r ResourceSpecificationReq) ToDTO() ResourceSpecification {
IsAutomaticSync: r.IsAutomaticSync,
CreatedBy: r.CreatorId,
UpdatedBy: r.CreatorId,
IsAvailable: true,
}
}

type SearchResourceSpecificationOptions struct {
ListOptions
QueueId int64
Status int
Cluster string
QueueId int64
Status int
Cluster string
AvailableCode int
}

type SearchResourceBriefSpecificationOptions struct {
@@ -113,6 +118,7 @@ type ResourceSpecificationRes struct {
ShareMemGiB float32
UnitPrice int
Status int
IsAvailable bool
UpdatedTime timeutil.TimeStamp
}

@@ -141,6 +147,53 @@ func (r ResourceSpecAndQueue) ConvertToRes() *ResourceSpecAndQueueRes {
}
}

type FindSpecsOptions struct {
JobType JobType
ComputeResource string
Cluster string
AiCenterCode string
SpecId int64
QueueCode string
SourceSpecId string
AccCardsNum int
UseAccCardsNum bool
AccCardType string
CpuCores int
UseCpuCores bool
MemGiB float32
UseMemGiB bool
GPUMemGiB float32
UseGPUMemGiB bool
ShareMemGiB float32
UseShareMemGiB bool
//if true,find specs no matter used or not used in scene. if false,only find specs used in scene
RequestAll bool
}

type Specification struct {
ID int64
SourceSpecId string
AccCardsNum int
AccCardType string
CpuCores int
MemGiB float32
GPUMemGiB float32
ShareMemGiB float32
ComputeResource string
UnitPrice int
QueueId int64
QueueCode string
Cluster string
AiCenterCode string
AiCenterName string
IsExclusive bool
ExclusiveOrg string
}

func (Specification) TableName() string {
return "resource_specification"
}

func InsertResourceSpecification(r ResourceSpecification) (int64, error) {
return x.Insert(&r)
}
@@ -167,6 +220,11 @@ func SearchResourceSpecification(opts SearchResourceSpecificationOptions) (int64
if opts.Cluster != "" {
cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
}
if opts.AvailableCode == 1 {
cond = cond.And(builder.Eq{"resource_specification.is_available": true})
} else if opts.AvailableCode == 2 {
cond = cond.And(builder.Eq{"resource_specification.is_available": false})
}
//cond = cond.And(builder.Or(builder.Eq{"resource_queue.deleted_time": 0}).Or(builder.IsNull{"resource_queue.deleted_time"}))
n, err := x.Where(cond).Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
Unscoped().Count(&ResourceSpecAndQueue{})
@@ -256,7 +314,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS
return err
}
if len(deleteIds) > 0 {
if _, err = sess.In("id", deleteIds).Update(&ResourceSpecification{Status: SpecOffShelf}); err != nil {
if _, err = sess.Cols("status", "is_available").In("id", deleteIds).Update(&ResourceSpecification{Status: SpecOffShelf, IsAvailable: false}); err != nil {
return err
}
if _, err = sess.In("spec_id", deleteIds).Delete(&ResourceSceneSpec{}); err != nil {
@@ -267,7 +325,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS
//update exists specs
if len(updateList) > 0 {
for _, v := range updateList {
if _, err = sess.ID(v.ID).Update(&v); err != nil {
if _, err = sess.ID(v.ID).UseBool("is_available").Update(&v); err != nil {
return err
}
}
@@ -283,3 +341,221 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS

return sess.Commit()
}

//FindSpecs
func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) {
var cond = builder.NewCond()
if !opts.RequestAll && opts.JobType != "" {
cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType})
}
if opts.ComputeResource != "" {
cond = cond.And(builder.Eq{"resource_queue.compute_resource": opts.ComputeResource})
}
if opts.Cluster != "" {
cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
}
if opts.AiCenterCode != "" {
cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode})
}
if opts.SpecId > 0 {
cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId})
}
if opts.QueueCode != "" {
cond = cond.And(builder.Eq{"resource_queue.queue_code": opts.QueueCode})
}
if opts.SourceSpecId != "" {
cond = cond.And(builder.Eq{"resource_specification.source_spec_id": opts.SourceSpecId})
}
if opts.UseAccCardsNum {
cond = cond.And(builder.Eq{"resource_specification.acc_cards_num": opts.AccCardsNum})
}
if opts.AccCardType != "" {
cond = cond.And(builder.Eq{"resource_queue.acc_card_type": opts.AccCardType})
}
if opts.UseCpuCores {
cond = cond.And(builder.Eq{"resource_specification.cpu_cores": opts.CpuCores})
}
if opts.UseMemGiB {
cond = cond.And(builder.Eq{"resource_specification.mem_gi_b": opts.MemGiB})
}
if opts.UseGPUMemGiB {
cond = cond.And(builder.Eq{"resource_specification.gpu_mem_gi_b": opts.GPUMemGiB})
}
if opts.UseShareMemGiB {
cond = cond.And(builder.Eq{"resource_specification.share_mem_gi_b": opts.ShareMemGiB})
}
r := make([]*Specification, 0)
s := x.Where(cond).
Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id")

if !opts.RequestAll {
s = s.Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id").
Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id")
}
err := s.OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc,resource_specification.cpu_cores asc,resource_specification.mem_gi_b asc,resource_specification.share_mem_gi_b asc").
Unscoped().Find(&r)
if err != nil {
return nil, err
}
return r, nil
}

func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specification, error) {
sess := x.NewSession()
defer sess.Close()

sess.Begin()
param := ResourceQueue{
QueueCode: queue.QueueCode,
Cluster: queue.Cluster,
AiCenterCode: queue.AiCenterCode,
ComputeResource: queue.ComputeResource,
AccCardType: queue.AccCardType,
}
_, err := sess.Get(&param)
if err != nil {
sess.Rollback()
return nil, err
}
if param.ID == 0 {
_, err = sess.InsertOne(&queue)
if err != nil {
sess.Rollback()
return nil, err
}
} else {
queue = param
}

spec.QueueId = queue.ID
_, err = sess.InsertOne(&spec)
if err != nil {
sess.Rollback()
return nil, err
}
sess.Commit()
return BuildSpecification(queue, spec), nil
}

func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification {
return &Specification{
ID: spec.ID,
SourceSpecId: spec.SourceSpecId,
AccCardsNum: spec.AccCardsNum,
AccCardType: queue.AccCardType,
CpuCores: spec.CpuCores,
MemGiB: spec.MemGiB,
GPUMemGiB: spec.GPUMemGiB,
ShareMemGiB: spec.ShareMemGiB,
ComputeResource: queue.ComputeResource,
UnitPrice: spec.UnitPrice,
QueueId: queue.ID,
QueueCode: queue.QueueCode,
Cluster: queue.Cluster,
AiCenterCode: queue.AiCenterCode,
AiCenterName: queue.AiCenterName,
}
}

func GetCloudbrainOneAccCardType(queueCode string) string {
switch queueCode {
case "a100":
return "A100"
case "openidebug":
return "T4"
case "openidgx":
return "V100"

}
return ""
}

var cloudbrainTwoSpecsInitFlag = false
var cloudbrainTwoSpecs map[string]*Specification

func GetCloudbrainTwoSpecs() (map[string]*Specification, error) {
if !cloudbrainTwoSpecsInitFlag {
r, err := InitCloudbrainTwoSpecs()
if err != nil {
return nil, err
}
cloudbrainTwoSpecsInitFlag = true
cloudbrainTwoSpecs = r
}
return cloudbrainTwoSpecs, nil
}

func InitCloudbrainTwoSpecs() (map[string]*Specification, error) {
r := make(map[string]*Specification, 0)

queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"})
if err != nil {
return nil, err
}
if queue == nil {
queue = &ResourceQueue{
QueueCode: "openisupport",
Cluster: OpenICluster,
AiCenterCode: AICenterOfCloudBrainTwo,
AiCenterName: "云脑二",
ComputeResource: NPU,
AccCardType: "ASCEND910",
Remark: "处理历史云脑任务时自动生成",
}
_, err = x.InsertOne(queue)
if err != nil {
return nil, err
}
}
for i := 1; i <= 8; i = i * 2 {
sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i)
spec, err := GetResourceSpecification(&ResourceSpecification{
SourceSpecId: sourceSpecId,
QueueId: queue.ID,
})
if err != nil {
return nil, err
}
if spec == nil {
spec = &ResourceSpecification{
QueueId: queue.ID,
SourceSpecId: sourceSpecId,
AccCardsNum: i,
CpuCores: i * 24,
MemGiB: float32(i * 256),
GPUMemGiB: float32(32),
Status: SpecOffShelf,
IsAvailable: true,
}
_, err = x.Insert(spec)
if err != nil {
return nil, err
}
}
r[sourceSpecId] = BuildSpecification(*queue, *spec)
}
return r, nil
}

var grampusSpecsInitFlag = false
var grampusSpecs map[string]*Specification

func GetGrampusSpecs() (map[string]*Specification, error) {
if !grampusSpecsInitFlag {
specMap := make(map[string]*Specification, 0)
r, err := FindSpecs(FindSpecsOptions{
Cluster: C2NetCluster,
RequestAll: true,
})
if err != nil {
return nil, err
}
for _, spec := range r {
specMap[spec.SourceSpecId] = spec
specMap[spec.SourceSpecId+"_"+spec.AiCenterCode] = spec
}
grampusSpecsInitFlag = true
grampusSpecs = specMap
}
return grampusSpecs, nil
}

+ 2
- 0
modules/auth/cloudbrain.go View File

@@ -24,6 +24,7 @@ type CreateCloudBrainForm struct {
Params string `form:"run_para_list"`
BranchName string `form:"branch_name"`
DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id"`
}

type CommitImageCloudBrainForm struct {
@@ -72,6 +73,7 @@ type CreateCloudBrainInferencForm struct {
CkptName string `form:"ckpt_name" binding:"Required"`
LabelName string `form:"label_names" binding:"Required"`
DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id"`
}

func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {


+ 1
- 2
modules/auth/grampus.go View File

@@ -11,15 +11,14 @@ type CreateGrampusTrainJobForm struct {
Attachment string `form:"attachment" binding:"Required"`
BootFile string `form:"boot_file" binding:"Required"`
ImageID string `form:"image_id" binding:"Required"`
FlavorID string `form:"flavor" binding:"Required"`
Params string `form:"run_para_list" binding:"Required"`
Description string `form:"description"`
BranchName string `form:"branch_name" binding:"Required"`
FlavorName string `form:"flavor_name" binding:"Required"`
EngineName string `form:"engine_name" binding:"Required"`
WorkServerNumber int `form:"work_server_number" binding:"Required"`
Image string `form:"image"`
DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id"`
}

func (f *CreateGrampusTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {


+ 3
- 0
modules/auth/modelarts.go View File

@@ -22,6 +22,7 @@ type CreateModelArtsNotebookForm struct {
Description string `form:"description"`
Flavor string `form:"flavor" binding:"Required"`
ImageId string `form:"image_id" binding:"Required"`
SpecId int64 `form:"spec_id" binding:"Required"`
}

func (f *CreateModelArtsNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {
@@ -46,6 +47,7 @@ type CreateModelArtsTrainJobForm struct {
VersionName string `form:"version_name" binding:"Required"`
FlavorName string `form:"flaver_names" binding:"Required"`
EngineName string `form:"engine_names" binding:"Required"`
SpecId int64 `form:"spec_id" binding:"Required"`
}

type CreateModelArtsInferenceJobForm struct {
@@ -71,6 +73,7 @@ type CreateModelArtsInferenceJobForm struct {
ModelName string `form:"model_name" binding:"Required"`
ModelVersion string `form:"model_version" binding:"Required"`
CkptName string `form:"ckpt_name" binding:"Required"`
SpecId int64 `form:"spec_id" binding:"Required"`
}

func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {


+ 1
- 1
modules/auth/wechat/cloudbrain.go View File

@@ -62,7 +62,7 @@ type CloudbrainStopMsg struct {

func (CloudbrainStopMsg) Data(ctx *TemplateContext) *DefaultWechatTemplate {
return &DefaultWechatTemplate{
First: TemplateValue{Value: setting.CloudbrainStoppedTitle},
First: TemplateValue{Value: fmt.Sprintf(setting.CloudbrainStoppedTitle, ctx.Cloudbrain.Status)},
Keyword1: TemplateValue{Value: ctx.Cloudbrain.DisplayJobName},
Keyword2: TemplateValue{Value: getJobTypeDisplayName(ctx.Cloudbrain.JobType)},
Keyword3: TemplateValue{Value: time.Unix(int64(ctx.Cloudbrain.CreatedUnix), 0).Format("2006-01-02 15:04:05")},


+ 18
- 76
modules/cloudbrain/cloudbrain.go View File

@@ -20,7 +20,7 @@ import (
const (
//Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CommandBenchmark = `cd /benchmark && bash run_bk.sh >/model/benchmark-log.txt`
CodeMountPath = "/code"
DataSetMountPath = "/dataset"
ModelMountPath = "/model"
@@ -30,8 +30,8 @@ const (
Snn4imagenetMountPath = "/snn4imagenet"
BrainScoreMountPath = "/brainscore"
TaskInfoName = "/taskInfo"
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s'`
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s'`
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s' >/model/benchmark-log.txt`
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s' >/model/benchmark-log.txt`

SubTaskName = "task1"

@@ -61,7 +61,6 @@ type GenerateCloudBrainTaskReq struct {
Snn4ImageNetPath string
BrainScorePath string
JobType string
GpuQueue string
Description string
BranchName string
BootFile string
@@ -72,13 +71,13 @@ type GenerateCloudBrainTaskReq struct {
DatasetInfos map[string]models.DatasetInfo
BenchmarkTypeID int
BenchmarkChildTypeID int
ResourceSpecId int
ResultPath string
TrainUrl string
ModelName string
ModelVersion string
CkptName string
LabelName string
Spec *models.Specification
}

func GetCloudbrainDebugCommand() string {
@@ -227,50 +226,9 @@ func AdminOrImageCreaterRight(ctx *context.Context) {
}

func GenerateTask(req GenerateCloudBrainTaskReq) error {
var resourceSpec *models.ResourceSpec
var versionCount int
if req.JobType == string(models.JobTypeTrain) {
versionCount = 1
if TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs)
}
for _, spec := range TrainResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}
} else if req.JobType == string(models.JobTypeInference) {
if InferenceResourceSpecs == nil {
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &InferenceResourceSpecs)
}
for _, spec := range InferenceResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}

} else {
if ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
}
for _, spec := range ResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}

}
//如果没有匹配到spec信息,尝试从专属资源池获取
if resourceSpec == nil && SpecialPools != nil {
resourceSpec = geMatchResourceSpec(req.JobType, req.GpuQueue, req.ResourceSpecId)
}

if resourceSpec == nil {
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"])
return errors.New("no such resourceSpec")
}

volumes := []models.Volume{
@@ -342,7 +300,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
jobResult, err := CreateJob(req.JobName, models.CreateJobParams{
JobName: req.JobName,
RetryCount: 1,
GpuType: req.GpuQueue,
GpuType: req.Spec.QueueCode,
Image: req.Image,
TaskRoles: []models.TaskRole{
{
@@ -350,10 +308,10 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
TaskNumber: 1,
MinSucceededTaskCount: 1,
MinFailedTaskCount: 1,
CPUNumber: resourceSpec.CpuNum,
GPUNumber: resourceSpec.GpuNum,
MemoryMB: resourceSpec.MemMiB,
ShmMB: resourceSpec.ShareMemMiB,
CPUNumber: req.Spec.CpuCores,
GPUNumber: req.Spec.AccCardsNum,
MemoryMB: int(req.Spec.MemGiB * 1024),
ShmMB: int(req.Spec.ShareMemGiB * 1024),
Command: req.Command,
NeedIBDevice: false,
IsMainRole: false,
@@ -384,8 +342,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
Type: models.TypeCloudBrainOne,
Uuid: req.Uuids,
Image: req.Image,
GpuQueue: req.GpuQueue,
ResourceSpecId: req.ResourceSpecId,
GpuQueue: req.Spec.QueueCode,
ComputeResource: models.GPUResource,
BenchmarkTypeID: req.BenchmarkTypeID,
BenchmarkChildTypeID: req.BenchmarkChildTypeID,
@@ -405,6 +362,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
CreatedUnix: createTime,
UpdatedUnix: createTime,
CommitID: req.CommitID,
Spec: req.Spec,
})

if err != nil {
@@ -416,6 +374,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
log.Error("GetCloudbrainByJobID failed: %v", err.Error())
return err
}

stringId := strconv.FormatInt(task.ID, 10)

if IsBenchmarkJob(req.JobType) {
@@ -447,25 +406,7 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy
func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) error {
jobName := task.JobName

var resourceSpec *models.ResourceSpec
if ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
}
for _, spec := range ResourceSpecs.ResourceSpec {
if task.ResourceSpecId == spec.Id {
resourceSpec = spec
}
}

//如果没有匹配到spec信息,尝试从专属资源池获取
if resourceSpec == nil && SpecialPools != nil {
resourceSpec = geMatchResourceSpec(task.JobType, task.GpuQueue, task.ResourceSpecId)
}

if resourceSpec == nil {
log.Error("no such resourceSpecId(%d)", task.ResourceSpecId, ctx.Data["MsgID"])
return errors.New("no such resourceSpec")
}
spec := task.Spec
var datasetInfos map[string]models.DatasetInfo
if task.Uuid != "" {
var err error
@@ -547,10 +488,10 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e
TaskNumber: 1,
MinSucceededTaskCount: 1,
MinFailedTaskCount: 1,
CPUNumber: resourceSpec.CpuNum,
GPUNumber: resourceSpec.GpuNum,
MemoryMB: resourceSpec.MemMiB,
ShmMB: resourceSpec.ShareMemMiB,
CPUNumber: spec.CpuCores,
GPUNumber: spec.AccCardsNum,
MemoryMB: int(spec.MemGiB * 1024),
ShmMB: int(spec.ShareMemGiB * 1024),
Command: GetCloudbrainDebugCommand(), //Command,
NeedIBDevice: false,
IsMainRole: false,
@@ -588,6 +529,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e
CreatedUnix: createTime,
UpdatedUnix: createTime,
BranchName: task.BranchName,
Spec: spec,
}

err = models.RestartCloudbrain(task, newTask)


+ 9
- 11
modules/grampus/grampus.go View File

@@ -30,18 +30,17 @@ const (

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
FlavorInfos *setting.StFlavorInfos
ImageInfos *setting.StImageInfosModelArts

SpecialPools *models.SpecialPools
)

type GenerateTrainJobReq struct {
JobName string
Command string
ResourceSpecId string
ImageUrl string //与image_id二选一,都有的情况下优先image_url
ImageId string
JobName string
Command string
ImageUrl string //与image_id二选一,都有的情况下优先image_url
ImageId string

DisplayJobName string
Uuid string
@@ -58,7 +57,6 @@ type GenerateTrainJobReq struct {
BranchName string
PreVersionId int64
PreVersionName string
FlavorName string
VersionCount int
EngineName string
TotalVersionCount int
@@ -66,6 +64,7 @@ type GenerateTrainJobReq struct {
ProcessType string
DatasetName string
Params string
Spec *models.Specification
}

func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
@@ -79,7 +78,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
{
Name: req.JobName,
Command: req.Command,
ResourceSpecId: req.ResourceSpecId,
ResourceSpecId: req.Spec.SourceSpecId,
ImageId: req.ImageId,
ImageUrl: req.ImageUrl,
CenterID: centerID,
@@ -114,15 +113,14 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
Parameters: req.Params,
BootFile: req.BootFile,
DataUrl: req.DataUrl,
FlavorCode: req.ResourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
EngineName: req.EngineName,
VersionCount: req.VersionCount,
TotalVersionCount: req.TotalVersionCount,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
})

if err != nil {


+ 44
- 23
modules/modelarts/modelarts.go View File

@@ -1,6 +1,7 @@
package modelarts

import (
"code.gitea.io/gitea/modules/modelarts_cd"
"encoding/json"
"errors"
"fmt"
@@ -68,10 +69,9 @@ const (

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
TrainFlavorInfos *Flavor
SpecialPools *models.SpecialPools
SpecialPools *models.SpecialPools
MultiNodeConfig *MultiNodes
)

type GenerateTrainJobReq struct {
@@ -84,7 +84,6 @@ type GenerateTrainJobReq struct {
BootFileUrl string
DataUrl string
TrainUrl string
FlavorCode string
LogUrl string
PoolID string
WorkServerNumber int
@@ -96,6 +95,7 @@ type GenerateTrainJobReq struct {
BranchName string
PreVersionId int64
PreVersionName string
FlavorCode string
FlavorName string
VersionCount int
EngineName string
@@ -103,6 +103,7 @@ type GenerateTrainJobReq struct {
UserImageUrl string
UserCommand string
DatasetName string
Spec *models.Specification
}

type GenerateInferenceJobReq struct {
@@ -115,7 +116,6 @@ type GenerateInferenceJobReq struct {
BootFileUrl string
DataUrl string
TrainUrl string
FlavorCode string
LogUrl string
PoolID string
WorkServerNumber int
@@ -134,6 +134,7 @@ type GenerateInferenceJobReq struct {
ModelVersion string
CkptName string
ResultUrl string
Spec *models.Specification
DatasetName string
}

@@ -166,6 +167,14 @@ type ResourcePool struct {
} `json:"resource_pool"`
}

type MultiNodes struct{
Info []OrgMultiNode `json:"multinode"`
}
type OrgMultiNode struct{
Org string `json:"org"`
Node []int `json:"node"`
}

// type Parameter struct {
// Label string `json:"label"`
// Value string `json:"value"`
@@ -257,7 +266,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin
return nil
}

func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error {
func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error {
if poolInfos == nil {
json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
}
@@ -271,7 +280,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
jobResult, err := createNotebook2(models.CreateNotebook2Params{
JobName: jobName,
Description: description,
Flavor: flavor,
Flavor: spec.SourceSpecId,
Duration: autoStopDurationMs,
ImageID: imageId,
PoolID: poolInfos.PoolInfo[0].PoolId,
@@ -308,7 +317,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobName: jobName,
FlavorCode: flavor,
FlavorCode: spec.SourceSpecId,
DisplayJobName: displayJobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCloudBrainTwo,
@@ -318,6 +327,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
Description: description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: spec,
}

err = models.CreateCloudbrain(task)
@@ -348,7 +358,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
Code: req.Spec.SourceSpecId,
},
Parameter: req.Parameters,
UserImageUrl: req.UserImageUrl,
@@ -370,7 +380,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
Code: req.Spec.SourceSpecId,
},
Parameter: req.Parameters,
},
@@ -419,7 +429,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
BootFile: req.BootFile,
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
FlavorCode: req.FlavorCode,
FlavorCode: req.Spec.SourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
@@ -428,6 +438,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
TotalVersionCount: req.TotalVersionCount,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
})

if createErr != nil {
@@ -479,7 +490,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
LogUrl: req.LogUrl,
PoolID: req.PoolID,
Flavor: models.Flavor{
Code: req.FlavorCode,
Code: req.Spec.SourceSpecId,
},
Parameter: req.Parameters,
PreVersionId: req.PreVersionId,
@@ -500,7 +511,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
LogUrl: req.LogUrl,
PoolID: req.PoolID,
Flavor: models.Flavor{
Code: req.FlavorCode,
Code: req.Spec.SourceSpecId,
},
Parameter: req.Parameters,
PreVersionId: req.PreVersionId,
@@ -567,7 +578,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
PreVersionId: req.PreVersionId,
FlavorCode: req.FlavorCode,
FlavorCode: req.Spec.SourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
@@ -576,6 +587,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
VersionCount: VersionListCount + 1,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
})
if createErr != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error())
@@ -666,7 +678,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
Code: req.Spec.SourceSpecId,
},
Parameter: req.Parameters,
},
@@ -718,7 +730,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
BootFile: req.BootFile,
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
FlavorCode: req.FlavorCode,
FlavorCode: req.Spec.SourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
@@ -734,6 +746,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
ResultUrl: req.ResultUrl,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
})

if err != nil {
@@ -748,11 +761,7 @@ func GetNotebookImageName(imageId string) (string, error) {
var validImage = false
var imageName = ""

if ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
}

for _, imageInfo := range ImageInfos.ImageInfo {
for _, imageInfo := range setting.StImageInfos.ImageInfo {
if imageInfo.Id == imageId {
validImage = true
imageName = imageInfo.Value
@@ -773,6 +782,13 @@ func InitSpecialPool() {
}
}

func InitMultiNode(){
if MultiNodeConfig ==nil && setting.ModelArtsMultiNode!=""{
json.Unmarshal([]byte(setting.ModelArtsMultiNode), &MultiNodeConfig)
}

}

func HandleTrainJobInfo(task *models.Cloudbrain) error {

result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
@@ -809,8 +825,13 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error {
}

func HandleNotebookInfo(task *models.Cloudbrain) error {

result, err := GetNotebook2(task.JobID)
var result *models.GetNotebook2Result
var err error
if task.Type == models.TypeCloudBrainTwo {
result, err = GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}
if err != nil {
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
return err


+ 215
- 0
modules/modelarts_cd/modelarts.go View File

@@ -0,0 +1,215 @@
package modelarts_cd

import (
"errors"
"strconv"
"strings"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
)

const (
//notebook
storageTypeOBS = "obs"
autoStopDuration = 4 * 60 * 60
autoStopDurationMs = 4 * 60 * 60 * 1000
MORDELART_USER_IMAGE_ENGINE_ID = -1
DataSetMountPath = "/home/ma-user/work"
NotebookEnv = "Python3"
NotebookType = "Ascend"
FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"

//train-job
CodePath = "/code/"
OutputPath = "/output/"
ResultPath = "/result/"
LogPath = "/log/"
JobPath = "/job/"
OrderDesc = "desc" //向下查询
OrderAsc = "asc" //向上查询
Lines = 500
TrainUrl = "train_url"
DataUrl = "data_url"
MultiDataUrl = "multi_data_url"
ResultUrl = "result_url"
CkptUrl = "ckpt_url"
DeviceTarget = "device_target"
Ascend = "Ascend"
PerPage = 10
IsLatestVersion = "1"
NotLatestVersion = "0"
VersionCountOne = 1

SortByCreateTime = "create_time"
ConfigTypeCustom = "custom"
TotalVersionCount = 1
)

var ()

type VersionInfo struct {
Version []struct {
ID int `json:"id"`
Value string `json:"value"`
Url string `json:"url"`
} `json:"version"`
}

type Flavor struct {
Info []struct {
Code string `json:"code"`
Value string `json:"value"`
} `json:"flavor"`
}

type Engine struct {
Info []struct {
ID int `json:"id"`
Value string `json:"value"`
} `json:"engine"`
}

type ResourcePool struct {
Info []struct {
ID string `json:"id"`
Value string `json:"value"`
} `json:"resource_pool"`
}

type Parameters struct {
Parameter []struct {
Label string `json:"label"`
Value string `json:"value"`
} `json:"parameter"`
}

func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error {
imageName, err := GetNotebookImageName(imageId)
if err != nil {
log.Error("GetNotebookImageName failed: %v", err.Error())
return err
}
createTime := timeutil.TimeStampNow()
jobResult, err := createNotebook(models.CreateNotebookWithoutPoolParams{
JobName: jobName,
Description: description,
Flavor: spec.SourceSpecId,
Duration: autoStopDurationMs,
ImageID: imageId,
Feature: models.NotebookFeature,
Volume: models.VolumeReq{
Capacity: setting.Capacity,
Category: models.EVSCategory,
Ownership: models.ManagedOwnership,
},
WorkspaceID: "0",
})
if err != nil {
log.Error("createNotebook failed: %v", err.Error())
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", displayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
JobID: models.TempJobId,
VersionID: models.TempVersionId,
Status: models.TempJobStatus,
Type: models.TypeCDCenter,
JobName: jobName,
JobType: string(models.JobTypeDebug),
})
if errTemp != nil {
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
return errTemp
}
}
return err
}
task := &models.Cloudbrain{
Status: jobResult.Status,
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobName: jobName,
FlavorCode: spec.SourceSpecId,
DisplayJobName: displayJobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCDCenter,
Uuid: uuid,
ComputeResource: models.NPUResource,
Image: imageName,
Description: description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: spec,
}

err = models.CreateCloudbrain(task)
if err != nil {
return err
}

stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
return nil
}

func GetNotebookImageName(imageId string) (string, error) {
var validImage = false
var imageName = ""

for _, imageInfo := range setting.StImageInfos.ImageInfo {
if imageInfo.Id == imageId {
validImage = true
imageName = imageInfo.Value
}
}

if !validImage {
log.Error("the image id(%s) is invalid", imageId)
return imageName, errors.New("the image id is invalid")
}

return imageName, nil
}

/*
func HandleNotebookInfo(task *models.Cloudbrain) error {

result, err := GetNotebook(task.JobID)
if err != nil {
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
return err
}

if result != nil {
oldStatus := task.Status
task.Status = result.Status
if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
}
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.CorrectCreateUnix()
task.ComputeAndSetDuration()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
if task.FlavorCode == "" {
task.FlavorCode = result.Flavor
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
return err
}
}

return nil
}

*/

+ 220
- 0
modules/modelarts_cd/resty.go View File

@@ -0,0 +1,220 @@
package modelarts_cd

import (
"bytes"
"code.gitea.io/gitea/modules/modelarts_gateway/core"
"crypto/tls"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"strconv"
"time"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)

var (
httpClient *http.Client
HOST string
TOKEN string
)

const (
errorCodeExceedLimit = "ModelArts.0118"

//notebook 2.0
urlNotebook2 = "/notebooks"

//error code
modelartsIllegalToken = "ModelArts.6401"
NotebookNotFound = "ModelArts.6404"
NotebookNoPermission = "ModelArts.6407"
NotebookInvalid = "ModelArts.6400"
UnknownErrorPrefix = "UNKNOWN:"
)

func getHttpClient() *http.Client {
if httpClient == nil {
httpClient = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}},
}
}
return httpClient
}

func GetNotebook(jobID string) (*models.GetNotebook2Result, error) {
var result models.GetNotebook2Result

client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}
r, _ := http.NewRequest(http.MethodGet,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID,
nil)

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
var result models.NotebookActionResult

client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}
r, _ := http.NewRequest(http.MethodPost,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID+"/"+param.Action+"?duration="+strconv.Itoa(autoStopDurationMs),
nil)

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func DelNotebook(jobID string) (*models.NotebookDelResult, error) {
var result models.NotebookDelResult

client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}

r, _ := http.NewRequest(http.MethodDelete,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID,
nil)

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func createNotebook(createJobParams models.CreateNotebookWithoutPoolParams) (*models.CreateNotebookResult, error) {
var result models.CreateNotebookResult
client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}

req, _ := json.Marshal(createJobParams)
r, _ := http.NewRequest(http.MethodPost,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2,
ioutil.NopCloser(bytes.NewBuffer(req)))

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
if result.ErrorCode == errorCodeExceedLimit {
result.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
}
return &result, fmt.Errorf("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

+ 42
- 0
modules/modelarts_gateway/core/escape.go View File

@@ -0,0 +1,42 @@
// based on https://github.com/golang/go/blob/master/src/net/url/url.go
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package core

func shouldEscape(c byte) bool {
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c == '-' || c == '~' || c == '.' {
return false
}
return true
}
func escape(s string) string {
hexCount := 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c) {
hexCount++
}
}

if hexCount == 0 {
return s
}

t := make([]byte, len(s)+2*hexCount)
j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case shouldEscape(c):
t[j] = '%'
t[j+1] = "0123456789ABCDEF"[c>>4]
t[j+2] = "0123456789ABCDEF"[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}

+ 208
- 0
modules/modelarts_gateway/core/signer.go View File

@@ -0,0 +1,208 @@
// HWS API Gateway Signature
// based on https://github.com/datastream/aws/blob/master/signv4.go
// Copyright (c) 2014, Xianjie

package core

import (
"bytes"
"crypto/hmac"
"crypto/sha256"
"fmt"
"io/ioutil"
"net/http"
"sort"
"strings"
"time"
)

const (
BasicDateFormat = "20060102T150405Z"
Algorithm = "SDK-HMAC-SHA256"
HeaderXDate = "X-Sdk-Date"
HeaderHost = "host"
HeaderAuthorization = "Authorization"
HeaderContentSha256 = "X-Sdk-Content-Sha256"
)

func hmacsha256(key []byte, data string) ([]byte, error) {
h := hmac.New(sha256.New, []byte(key))
if _, err := h.Write([]byte(data)); err != nil {
return nil, err
}
return h.Sum(nil), nil
}

// Build a CanonicalRequest from a regular request string
//
// CanonicalRequest =
// HTTPRequestMethod + '\n' +
// CanonicalURI + '\n' +
// CanonicalQueryString + '\n' +
// CanonicalHeaders + '\n' +
// SignedHeaders + '\n' +
// HexEncode(Hash(RequestPayload))
func CanonicalRequest(r *http.Request, signedHeaders []string) (string, error) {
var hexencode string
var err error
if hex := r.Header.Get(HeaderContentSha256); hex != "" {
hexencode = hex
} else {
data, err := RequestPayload(r)
if err != nil {
return "", err
}
hexencode, err = HexEncodeSHA256Hash(data)
if err != nil {
return "", err
}
}
return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, CanonicalURI(r), CanonicalQueryString(r), CanonicalHeaders(r, signedHeaders), strings.Join(signedHeaders, ";"), hexencode), err
}

// CanonicalURI returns request uri
func CanonicalURI(r *http.Request) string {
pattens := strings.Split(r.URL.Path, "/")
var uri []string
for _, v := range pattens {
uri = append(uri, escape(v))
}
urlpath := strings.Join(uri, "/")
if len(urlpath) == 0 || urlpath[len(urlpath)-1] != '/' {
urlpath = urlpath + "/"
}
return urlpath
}

// CanonicalQueryString
func CanonicalQueryString(r *http.Request) string {
var keys []string
query := r.URL.Query()
for key := range query {
keys = append(keys, key)
}
sort.Strings(keys)
var a []string
for _, key := range keys {
k := escape(key)
sort.Strings(query[key])
for _, v := range query[key] {
kv := fmt.Sprintf("%s=%s", k, escape(v))
a = append(a, kv)
}
}
queryStr := strings.Join(a, "&")
r.URL.RawQuery = queryStr
return queryStr
}

// CanonicalHeaders
func CanonicalHeaders(r *http.Request, signerHeaders []string) string {
var a []string
header := make(map[string][]string)
for k, v := range r.Header {
header[strings.ToLower(k)] = v
}
for _, key := range signerHeaders {
value := header[key]
if strings.EqualFold(key, HeaderHost) {
value = []string{r.Host}
}
sort.Strings(value)
for _, v := range value {
a = append(a, key+":"+strings.TrimSpace(v))
}
}
return fmt.Sprintf("%s\n", strings.Join(a, "\n"))
}

// SignedHeaders
func SignedHeaders(r *http.Request) []string {
var a []string
for key := range r.Header {
a = append(a, strings.ToLower(key))
}
sort.Strings(a)
return a
}

// RequestPayload
func RequestPayload(r *http.Request) ([]byte, error) {
if r.Body == nil {
return []byte(""), nil
}
b, err := ioutil.ReadAll(r.Body)
if err != nil {
return []byte(""), err
}
r.Body = ioutil.NopCloser(bytes.NewBuffer(b))
return b, err
}

// Create a "String to Sign".
func StringToSign(canonicalRequest string, t time.Time) (string, error) {
hash := sha256.New()
_, err := hash.Write([]byte(canonicalRequest))
if err != nil {
return "", err
}
return fmt.Sprintf("%s\n%s\n%x",
Algorithm, t.UTC().Format(BasicDateFormat), hash.Sum(nil)), nil
}

// Create the HWS Signature.
func SignStringToSign(stringToSign string, signingKey []byte) (string, error) {
hm, err := hmacsha256(signingKey, stringToSign)
return fmt.Sprintf("%x", hm), err
}

// HexEncodeSHA256Hash returns hexcode of sha256
func HexEncodeSHA256Hash(body []byte) (string, error) {
hash := sha256.New()
if body == nil {
body = []byte("")
}
_, err := hash.Write(body)
return fmt.Sprintf("%x", hash.Sum(nil)), err
}

// Get the finalized value for the "Authorization" header. The signature parameter is the output from SignStringToSign
func AuthHeaderValue(signature, accessKey string, signedHeaders []string) string {
return fmt.Sprintf("%s Access=%s, SignedHeaders=%s, Signature=%s", Algorithm, accessKey, strings.Join(signedHeaders, ";"), signature)
}

// Signature HWS meta
type Signer struct {
Key string
Secret string
}

// SignRequest set Authorization header
func (s *Signer) Sign(r *http.Request) error {
var t time.Time
var err error
var dt string
if dt = r.Header.Get(HeaderXDate); dt != "" {
t, err = time.Parse(BasicDateFormat, dt)
}
if err != nil || dt == "" {
t = time.Now()
r.Header.Set(HeaderXDate, t.UTC().Format(BasicDateFormat))
}
signedHeaders := SignedHeaders(r)
canonicalRequest, err := CanonicalRequest(r, signedHeaders)
if err != nil {
return err
}
stringToSign, err := StringToSign(canonicalRequest, t)
if err != nil {
return err
}
signature, err := SignStringToSign(stringToSign, []byte(s.Secret))
if err != nil {
return err
}
authValue := AuthHeaderValue(signature, s.Key, signedHeaders)
r.Header.Set(HeaderAuthorization, authValue)
return nil
}

+ 76
- 8
modules/setting/setting.go View File

@@ -75,6 +75,26 @@ type C2NetSqInfos struct {
C2NetSqInfo []*C2NetSequenceInfo `json:"sequence"`
}

type StFlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type StImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}

type ImageInfoModelArts struct {
Id string `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

var (
// AppVer settings
AppVer string
@@ -535,18 +555,31 @@ var (
AllowedOrg string
ProfileID string
PoolInfos string
Flavor string
FlavorInfos string
DebugHost string
ImageInfos string
Capacity int
MaxTempQueryTimes int
StFlavorInfo *StFlavorInfos
StImageInfos *StImageInfosModelArts
//train-job
ResourcePools string
Engines string
EngineVersions string
FlavorInfos string
TrainJobFLAVORINFOS string
ModelArtsSpecialPools string
ModelArtsMultiNode string

// modelarts-cd config
ModelartsCD = struct {
Enabled bool
EndPoint string
ProjectID string
AccessKey string
SecretKey string
ImageInfos string
FlavorInfos string
}{}

//grampus config
Grampus = struct {
@@ -1422,9 +1455,8 @@ func NewContext() {
AllowedOrg = sec.Key("ORGANIZATION").MustString("")
ProfileID = sec.Key("PROFILE_ID").MustString("")
PoolInfos = sec.Key("POOL_INFOS").MustString("")
Flavor = sec.Key("FLAVOR").MustString("")
ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
Capacity = sec.Key("IMAGE_INFOS").MustInt(100)
Capacity = sec.Key("CAPACITY").MustInt(100)
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30)
ResourcePools = sec.Key("Resource_Pools").MustString("")
Engines = sec.Key("Engines").MustString("")
@@ -1432,6 +1464,7 @@ func NewContext() {
FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("")
TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("")
ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("")
ModelArtsMultiNode=sec.Key("MULTI_NODE").MustString("")

sec = Cfg.Section("elk")
ElkUrl = sec.Key("ELKURL").MustString("")
@@ -1460,7 +1493,7 @@ func NewContext() {
CloudbrainStartedRemark = sec.Key("CLOUDBRAIN_STARTED_REMARK").MustString("感谢您的耐心等待。")
CloudbrainStoppedTemplateId = sec.Key("CLOUDBRAIN_STOPPED_TEMPLATE_ID").MustString("")
CloudbrainStoppedNotifyList = strings.Split(sec.Key("CLOUDBRAIN_STOPPED_NOTIFY_LIST").MustString("TRAIN"), ",")
CloudbrainStoppedTitle = sec.Key("CLOUDBRAIN_STOPPED_TITLE").MustString("您好,您申请的算力资源已结束使用,任务已完成运行,请您关注运行结果")
CloudbrainStoppedTitle = sec.Key("CLOUDBRAIN_STOPPED_TITLE").MustString("您好,您申请的算力资源已结束使用,任务已完成运行,状态为%s,请您关注运行结果")
CloudbrainStoppedRemark = sec.Key("CLOUDBRAIN_STOPPED_REMARK").MustString("感谢您的耐心等待。")

SetRadarMapConfig()
@@ -1472,8 +1505,8 @@ func NewContext() {
Course.OrgName = sec.Key("org_name").MustString("")
Course.TeamName = sec.Key("team_name").MustString("")

GetGrampusConfig()
getGrampusConfig()
getModelartsCDConfig()
getModelConvertConfig()
}

@@ -1496,7 +1529,22 @@ func getModelConvertConfig() {
ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35)
}

func GetGrampusConfig() {
func getModelartsCDConfig() {
sec := Cfg.Section("modelarts-cd")

ModelartsCD.Enabled = sec.Key("ENABLED").MustBool(false)
ModelartsCD.EndPoint = sec.Key("ENDPOINT").MustString("https://modelarts.cn-southwest-228.cdzs.cn")
ModelartsCD.ProjectID = sec.Key("PROJECT_ID").MustString("")
ModelartsCD.AccessKey = sec.Key("ACCESS_KEY").MustString("")
ModelartsCD.SecretKey = sec.Key("SECRET_KEY").MustString("")
ModelartsCD.ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
ModelartsCD.FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("")

getNotebookImageInfos()
getNotebookFlavorInfos()
}

func getGrampusConfig() {
sec := Cfg.Section("grampus")

Grampus.Env = sec.Key("ENV").MustString("TEST")
@@ -1630,6 +1678,26 @@ func ensureLFSDirectory() {
}
}

func getNotebookImageInfos() {
if StImageInfos == nil {
if ModelartsCD.Enabled {
json.Unmarshal([]byte(ModelartsCD.ImageInfos), &StImageInfos)
} else {
json.Unmarshal([]byte(ImageInfos), &StImageInfos)
}
}
}

func getNotebookFlavorInfos() {
if StFlavorInfo == nil {
if ModelartsCD.Enabled {
json.Unmarshal([]byte(ModelartsCD.FlavorInfos), &StFlavorInfo)
} else {
json.Unmarshal([]byte(FlavorInfos), &StFlavorInfo)
}
}
}

// NewServices initializes the services
func NewServices() {
InitDBConfig()


+ 5
- 0
options/locale/locale_en-US.ini View File

@@ -1079,6 +1079,7 @@ balance.total_view = Total Balance
balance.available = Available Balance:
cloudbrain1 = cloudbrain1
cloudbrain2 = cloudbrain2
cdCenter = cd_ai_center
cloudbrain_selection = select cloudbrain
cloudbrain_platform_selection = Select the cloudbrain platform you want to use:
confirm_choice = Confirm
@@ -1213,6 +1214,7 @@ modelarts.infer_job.select_model = Select Model
modelarts.infer_job.boot_file_helper=The startup file is the entry file for your program execution and must end in.py.Such as inference.py, main.py, example/inference.py, case/main.py.
modelarts.infer_job.tooltip = The model has been deleted and cannot be viewed.
modelarts.download_log=Download log file
modelarts.no_node_right = The value of 'Amount of Compute Node' is wrong, you have no right to use the current value of 'Amount of Compute Node'.


debug_task_not_created = Debug task has not been created
@@ -3205,6 +3207,9 @@ gpu_num = GPU
cpu_num = CPU
memory = Memory
shared_memory = Shared Memory
gpu_memory = GPU Memory
free = Free
point_hr = Point/hr


DEBUG = DEBUG


+ 5
- 0
options/locale/locale_zh-CN.ini View File

@@ -1080,6 +1080,7 @@ balance.total_view=余额总览
balance.available=可用余额:
cloudbrain1=云脑1
cloudbrain2=云脑2
cdCenter=成都智算中心
intelligent_net=智算网络
cloudbrain_selection=云脑选择
cloudbrain_platform_selection=选择您准备使用的云脑平台:
@@ -1226,6 +1227,7 @@ modelarts.infer_job.select_model = 选择模型
modelarts.infer_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。比如inference.py、main.py、example/inference.py、case/main.py。
modelarts.infer_job.tooltip = 该模型已删除,无法查看。
modelarts.download_log=下载日志文件
modelarts.no_node_right = 计算节点数的值配置错误,您没有权限使用当前配置的计算节点数。


debug_task_not_created = 未创建过调试任务
@@ -3224,6 +3226,9 @@ gpu_num = GPU数
cpu_num = CPU数
memory = 内存
shared_memory = 共享内存
gpu_memory = 显存
free = 免费
point_hr = 积分/时

DEBUG = 调试任务
SNN4IMAGENET = 评测任务


+ 2
- 2
routers/admin/cloudbrains.go View File

@@ -92,13 +92,13 @@ func CloudBrains(ctx *context.Context) {
return
}

models.LoadSpecs4CloudbrainInfo(ciTasks)

for i, task := range ciTasks {
ciTasks[i].CanDebug = true
ciTasks[i].CanDel = true
ciTasks[i].Cloudbrain.ComputeResource = task.ComputeResource
ciTasks[i].Cloudbrain.AiCenter = repo.GetCloudbrainAiCenter(task.Cloudbrain, ctx)
_, cardType, _ := repo.GetCloudbrainCardNumAndType(task.Cloudbrain)
ciTasks[i].Cloudbrain.CardType = cardType
ciTasks[i].Cloudbrain.Cluster = repo.GetCloudbrainCluster(task.Cloudbrain, ctx)
}



+ 42
- 4
routers/admin/resources.go View File

@@ -8,6 +8,8 @@ import (
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/cloudbrain/resource"
"net/http"
"strconv"
"strings"
)

const (
@@ -118,11 +120,13 @@ func GetResourceSpecificationList(ctx *context.Context) {
queue := ctx.QueryInt64("queue")
status := ctx.QueryInt("status")
cluster := ctx.Query("cluster")
available := ctx.QueryInt("available")
list, err := resource.GetResourceSpecificationList(models.SearchResourceSpecificationOptions{
ListOptions: models.ListOptions{Page: page, PageSize: 10},
QueueId: queue,
Status: status,
Cluster: cluster,
ListOptions: models.ListOptions{Page: page, PageSize: 10},
QueueId: queue,
Status: status,
Cluster: cluster,
AvailableCode: available,
})
if err != nil {
log.Error("GetResourceSpecificationList error.%v", err)
@@ -246,3 +250,37 @@ func UpdateResourceScene(ctx *context.Context, req models.ResourceSceneReq) {
}
ctx.JSON(http.StatusOK, response.Success())
}

func RefreshHistorySpec(ctx *context.Context) {
scope := ctx.Query("scope")
list := ctx.Query("list")

var scopeAll = false
if scope == "all" {
scopeAll = true
}
var ids = make([]int64, 0)
if list != "" {
strs := strings.Split(list, "|")
for _, s := range strs {
i, err := strconv.ParseInt(s, 10, 64)
if err != nil {
ctx.JSON(http.StatusOK, response.ServerError(err.Error()))
return
}
ids = append(ids, i)
}

}

total, success, err := resource.RefreshHistorySpec(scopeAll, ids)
if err != nil {
log.Error("RefreshHistorySpec error. %v", err)
ctx.JSON(http.StatusOK, response.ServerError(err.Error()))
return
}
r := make(map[string]interface{}, 0)
r["success"] = success
r["total"] = total
ctx.JSON(http.StatusOK, response.SuccessWithData(r))
}

+ 139
- 30
routers/api/v1/repo/cloudbrain.go View File

@@ -405,46 +405,83 @@ func CloudbrainDownloadLogFile(ctx *context.Context) {

func CloudbrainGetLog(ctx *context.Context) {
ID := ctx.Params(":id")
startLine := ctx.QueryInt("base_line")
lines := ctx.QueryInt("lines")
endLine := startLine + lines
order := ctx.Query("order")
if order == "asc" {
endLine = startLine
startLine = endLine - lines
if startLine < 0 {
startLine = 0
}
}
job, err := models.GetCloudbrainByID(ID)
if err != nil {
log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
result := getLogFromModelDir(job.JobName, startLine, endLine)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
lines := ctx.QueryInt("lines")
baseLine := ctx.Query("base_line")
order := ctx.Query("order")
var result map[string]interface{}
resultPath := "/model"
if job.JobType == string(models.JobTypeInference) {
resultPath = "/result"
}
if baseLine == "" && order == "desc" {
result = getLastLogFromModelDir(job.JobName, lines, resultPath)
} else {
startLine := ctx.QueryInt("base_line")
endLine := startLine + lines
if order == "asc" {
if baseLine == "" {
startLine = 0
endLine = lines
} else {
endLine = startLine
startLine = endLine - lines
if startLine < 0 {
startLine = 0
}
}
}
result = getLogFromModelDir(job.JobName, startLine, endLine, resultPath)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
}

re := map[string]interface{}{
"JobID": ID,
"LogFileName": result["FileName"],
"StartLine": startLine,
"EndLine": result["endLine"],
"StartLine": result["StartLine"],
"EndLine": result["EndLine"],
"Content": result["Content"],
"Lines": result["lines"],
"Lines": result["Lines"],
"CanLogDownload": result["FileName"] != "",
}
//result := CloudbrainGetLogByJobId(job.JobID, job.JobName)

ctx.JSON(http.StatusOK, re)
}

func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + "/model"
func getAllLineFromFile(path string) int {
count := 0
reader, err := os.Open(path)
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for {
_, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
count = count + 1
}
} else {
log.Info("error:" + err.Error())
}
return count
}

func getLastLogFromModelDir(jobName string, lines int, resultPath string) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
@@ -454,11 +491,81 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
re := ""
fileName := ""
count := 0
allLines := 0
startLine := 0
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
fileName = file.FileName
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
allLines = getAllLineFromFile(path)
startLine = allLines - lines
if startLine < 0 {
startLine = 0
}
count = allLines - startLine
log.Info("path=" + path)
reader, err := os.Open(path)
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for i := 0; i < allLines; i++ {
line, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
if error == nil {
if i >= startLine {
re = re + line
}
}
}
} else {
log.Info("error:" + err.Error())
}
break
}
}

return map[string]interface{}{
"JobName": jobName,
"Content": re,
"FileName": fileName,
"Lines": count,
"EndLine": allLines,
"StartLine": startLine,
}
}

func getLogFromModelDir(jobName string, startLine int, endLine int, resultPath string) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
return nil
}
if startLine == endLine {
return map[string]interface{}{
"JobName": jobName,
"Content": "",
"FileName": "",
"Lines": 0,
"EndLine": startLine,
"StartLine": startLine,
}
}
re := ""
fileName := ""
count := 0
fileEndLine := endLine
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
fileName = file.FileName
path := storage.GetMinioPath(jobName+"/model/", file.FileName)
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
log.Info("path=" + path)
reader, err := os.Open(path)
defer reader.Close()
@@ -467,7 +574,6 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
for i := 0; i < endLine; i++ {
line, error := r.ReadString('\n')
log.Info("line=" + line)
fileEndLine = i
if error == io.EOF {
log.Info("read file completed.")
break
@@ -478,11 +584,13 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
}
if error == nil {
if i >= startLine {
fileEndLine = i
re = re + line
count++
}
}
}
fileEndLine = fileEndLine + 1
} else {
log.Info("error:" + err.Error())
}
@@ -491,11 +599,12 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
}

return map[string]interface{}{
"JobName": jobName,
"Content": re,
"FileName": fileName,
"lines": count,
"endLine": fileEndLine,
"JobName": jobName,
"Content": re,
"FileName": fileName,
"Lines": count,
"EndLine": fileEndLine,
"StartLine": startLine,
}
}



+ 2
- 0
routers/private/internal.go View File

@@ -6,6 +6,7 @@
package private

import (
"code.gitea.io/gitea/routers/admin"
"strings"

"code.gitea.io/gitea/routers/repo"
@@ -51,6 +52,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Get("/tool/org_stat", OrgStatisticManually)
m.Post("/tool/update_repo_visit/:date", UpdateRepoVisit)
m.Post("/task/history_handle/duration", repo.HandleTaskWithNoDuration)
m.Post("/resources/specification/handle_historical_task", admin.RefreshHistorySpec)

}, CheckInternalToken)
}

+ 187
- 273
routers/repo/cloudbrain.go View File

@@ -2,6 +2,7 @@ package repo

import (
"bufio"
"code.gitea.io/gitea/services/cloudbrain/resource"
"encoding/json"
"errors"
"fmt"
@@ -121,86 +122,7 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
ctx.Data["QueuesDetail"] = queuesDetail
}

cloudbrain.InitSpecialPool()

if gpuInfos == nil {
json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos)
}
ctx.Data["gpu_types"] = gpuInfos.GpuInfo

if trainGpuInfos == nil {
json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos)
}
ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo

if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" {
json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos)
}
if inferenceGpuInfos != nil {
ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo
}

if benchmarkGpuInfos == nil {
json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos)
}
ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo

if benchmarkResourceSpecs == nil {
json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs)
}
ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec

if cloudbrain.ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs)
}
ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec

if cloudbrain.TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
}
ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec

if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" {
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs)
}
if cloudbrain.InferenceResourceSpecs != nil {
ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec
}

if cloudbrain.SpecialPools != nil {
var debugGpuTypes []*models.GpuInfo
var trainGpuTypes []*models.GpuInfo

for _, pool := range cloudbrain.SpecialPools.Pools {
isOrgMember, _ := models.IsOrganizationMemberByOrgName(pool.Org, ctx.User.ID)
if isOrgMember {
for _, jobType := range pool.JobType {
if jobType == string(models.JobTypeDebug) {
debugGpuTypes = append(debugGpuTypes, pool.Pool...)
if pool.ResourceSpec != nil {
ctx.Data["resource_specs"] = pool.ResourceSpec
}
} else if jobType == string(models.JobTypeTrain) {
trainGpuTypes = append(trainGpuTypes, pool.Pool...)
if pool.ResourceSpec != nil {
ctx.Data["train_resource_specs"] = pool.ResourceSpec
}
}
}
break
}

}

if len(debugGpuTypes) > 0 {
ctx.Data["gpu_types"] = debugGpuTypes
}

if len(trainGpuTypes) > 0 {
ctx.Data["train_gpu_types"] = trainGpuTypes
}

}
prepareCloudbrainOneSpecs(ctx)

ctx.Data["params"] = ""
ctx.Data["branchName"] = ctx.Repo.BranchName
@@ -218,6 +140,40 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
return nil
}

func prepareCloudbrainOneSpecs(ctx *context.Context) {
debugSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeDebug,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne,
})
ctx.Data["debug_specs"] = debugSpecs

trainSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne,
})
ctx.Data["train_specs"] = trainSpecs

inferenceSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeInference,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne,
})
ctx.Data["inference_specs"] = inferenceSpecs

benchmarkSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeBenchmark,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne,
})
ctx.Data["benchmark_specs"] = benchmarkSpecs
}

func CloudBrainNew(ctx *context.Context) {
err := cloudBrainNewDataPrepare(ctx)
if err != nil {
@@ -235,9 +191,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
image := strings.TrimSpace(form.Image)
uuids := form.Attachment
jobType := form.JobType
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName
bootFile := strings.TrimSpace(form.BootFile)
repo := ctx.Repo.Repository
@@ -325,18 +279,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
command = commandTrain
}

errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId)

if errStr != "" {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(errStr, tpl, &form)
return
}

if branchName == "" {
branchName = cloudbrain.DefaultBranchName
}
errStr = loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath)
errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath)
if errStr != "" {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form)
@@ -345,6 +291,17 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {

commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)

spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobType(jobType),
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}

req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -360,7 +317,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: branchName,
BootFile: form.BootFile,
@@ -368,8 +324,8 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
CommitID: commitID,
BenchmarkTypeID: 0,
BenchmarkChildTypeID: 0,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec,
}

err = cloudbrain.GenerateTask(req)
@@ -417,9 +373,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
image := strings.TrimSpace(form.Image)
uuid := form.Attachment
jobType := string(models.JobTypeInference)
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName
bootFile := strings.TrimSpace(form.BootFile)
labelName := form.LabelName
@@ -501,7 +455,16 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return
}

spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeInference,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -517,19 +480,18 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: branchName,
BootFile: form.BootFile,
Params: form.Params,
CommitID: commitID,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
ModelName: form.ModelName,
ModelVersion: form.ModelVersion,
CkptName: form.CkptName,
TrainUrl: form.TrainUrl,
LabelName: labelName,
Spec: spec,
}

err = cloudbrain.GenerateTask(req)
@@ -607,34 +569,25 @@ func CloudBrainRestart(ctx *context.Context) {
break
}

var hasSameResource bool
if gpuInfos == nil {
json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos)
}
for _, resourceType := range gpuInfos.GpuInfo {
if resourceType.Queue == task.GpuQueue {
hasSameResource = true
break
}
}
if !hasSameResource && cloudbrain.SpecialPools != nil {

for _, specialPool := range cloudbrain.SpecialPools.Pools {
cloudbrain.IsElementExist(specialPool.JobType, string(models.JobTypeDebug))
for _, pool := range specialPool.Pool {
if pool.Queue == task.GpuQueue {
hasSameResource = true
}
}
}
specOld, err := resource.GetCloudbrainSpec(task.ID)
if err != nil || specOld == nil {
log.Error("CloudBrainRestart GetCloudbrainSpec error.task.id = %d", task.ID)
resultCode = "-1"
errorMsg = "Resource specification not support any more"
break
}

if !hasSameResource {
log.Error("has no same resource, can not restart", ctx.Data["MsgID"])
spec, err := resource.GetAndCheckSpec(ctx.User.ID, specOld.ID, models.FindSpecsOptions{
JobType: models.JobType(task.JobType),
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
log.Error("CloudBrainRestart GetAndCheckSpec error.task.id = %d", task.ID)
resultCode = "-1"
errorMsg = "the job's version is too old and can not be restarted"
errorMsg = "Resource specification not support any more"
break
}
task.Spec = spec

count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, string(models.JobTypeDebug))
if err != nil {
@@ -707,128 +660,13 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
hasSpec := false
if task.JobType == string(models.JobTypeTrain) {
if cloudbrain.TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
}

for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
ctx.Data["GpuNum"] = tmp.GpuNum
ctx.Data["CpuNum"] = tmp.CpuNum
ctx.Data["MemMiB"] = tmp.MemMiB
ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB
break
}
}

} else if task.JobType == string(models.JobTypeInference) {
if cloudbrain.InferenceResourceSpecs == nil {
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs)
}
for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
ctx.Data["GpuNum"] = tmp.GpuNum
ctx.Data["CpuNum"] = tmp.CpuNum
ctx.Data["MemMiB"] = tmp.MemMiB
ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB
break
}
}
} else {
if cloudbrain.ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs)
}
for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
ctx.Data["GpuNum"] = tmp.GpuNum
ctx.Data["CpuNum"] = tmp.CpuNum
ctx.Data["MemMiB"] = tmp.MemMiB
ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB
break

}
}
}

if !hasSpec && cloudbrain.SpecialPools != nil {

for _, specialPool := range cloudbrain.SpecialPools.Pools {

if specialPool.ResourceSpec != nil {

for _, spec := range specialPool.ResourceSpec {
if task.ResourceSpecId == spec.Id {
ctx.Data["GpuNum"] = spec.GpuNum
ctx.Data["CpuNum"] = spec.CpuNum
ctx.Data["MemMiB"] = spec.MemMiB
ctx.Data["ShareMemMiB"] = spec.ShareMemMiB
break
}
}
}
}
prepareSpec4Show(ctx, task)
if ctx.Written() {
return
}

if result != nil {
jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
jobRes.Resource.Memory = strings.ReplaceAll(jobRes.Resource.Memory, "Mi", "MB")
spec := "GPU数:" + strconv.Itoa(jobRes.Resource.NvidiaComGpu) + ",CPU数:" + strconv.Itoa(jobRes.Resource.CPU) + ",内存(MB):" + jobRes.Resource.Memory
ctx.Data["resource_spec"] = spec
if task.JobType == string(models.JobTypeTrain) {
if trainGpuInfos == nil {
json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos)
}
for _, resourceType := range trainGpuInfos.GpuInfo {
if resourceType.Queue == jobRes.Config.GpuType {
ctx.Data["resource_type"] = resourceType.Value
}
}

} else if task.JobType == string(models.JobTypeInference) {
if inferenceGpuInfos == nil {
json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos)
}
for _, resourceType := range inferenceGpuInfos.GpuInfo {
if resourceType.Queue == jobRes.Config.GpuType {
ctx.Data["resource_type"] = resourceType.Value
}
}
} else if cloudbrain.IsBenchmarkJob(task.JobType) {
if benchmarkGpuInfos == nil {
json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos)
}

for _, resourceType := range benchmarkGpuInfos.GpuInfo {
if resourceType.Queue == jobRes.Config.GpuType {
ctx.Data["resource_type"] = resourceType.Value
}
}

} else {
if gpuInfos == nil {
json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos)
}
for _, resourceType := range gpuInfos.GpuInfo {
if resourceType.Queue == jobRes.Config.GpuType {
ctx.Data["resource_type"] = resourceType.Value
}
}
}

if cloudbrain.SpecialPools != nil {
for _, specialPool := range cloudbrain.SpecialPools.Pools {
for _, resourceType := range specialPool.Pool {
if resourceType.Queue == jobRes.Config.GpuType {
ctx.Data["resource_type"] = resourceType.Value
}
}
}
}
taskRoles := jobRes.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
ctx.Data["taskRes"] = taskRes
@@ -952,6 +790,85 @@ func CloudBrainDebug(ctx *context.Context) {
ctx.Redirect(debugUrl)
}

func prepareSpec4Show(ctx *context.Context, task *models.Cloudbrain) {
s, err := resource.GetCloudbrainSpec(task.ID)
if err != nil {
log.Info("error:" + err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
ctx.Data["Spec"] = s
}

func oldPrepareSpec4Show(ctx *context.Context, task *models.Cloudbrain) {
hasSpec := false
if task.JobType == string(models.JobTypeTrain) {
if cloudbrain.TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
}

for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
ctx.Data["GpuNum"] = tmp.GpuNum
ctx.Data["CpuNum"] = tmp.CpuNum
ctx.Data["MemMiB"] = tmp.MemMiB
ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB
break
}
}

} else if task.JobType == string(models.JobTypeInference) {
if cloudbrain.InferenceResourceSpecs == nil {
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs)
}
for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
ctx.Data["GpuNum"] = tmp.GpuNum
ctx.Data["CpuNum"] = tmp.CpuNum
ctx.Data["MemMiB"] = tmp.MemMiB
ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB
break
}
}
} else {
if cloudbrain.ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs)
}
for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
ctx.Data["GpuNum"] = tmp.GpuNum
ctx.Data["CpuNum"] = tmp.CpuNum
ctx.Data["MemMiB"] = tmp.MemMiB
ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB
break

}
}
}

if !hasSpec && cloudbrain.SpecialPools != nil {

for _, specialPool := range cloudbrain.SpecialPools.Pools {

if specialPool.ResourceSpec != nil {

for _, spec := range specialPool.ResourceSpec {
if task.ResourceSpecId == spec.Id {
ctx.Data["GpuNum"] = spec.GpuNum
ctx.Data["CpuNum"] = spec.CpuNum
ctx.Data["MemMiB"] = spec.MemMiB
ctx.Data["ShareMemMiB"] = spec.ShareMemMiB
break
}
}
}
}
}
}

func CloudBrainCommitImageShow(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["Type"] = ctx.Cloudbrain.Type
@@ -2285,10 +2202,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
displayJobName := form.DisplayJobName
jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
image := strings.TrimSpace(form.Image)
gpuQueue := form.GpuType
command := cloudbrain.CommandBenchmark
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := cloudbrain.BenchMarkResourceID
benchmarkTypeID := form.BenchmarkTypeID
benchmarkChildTypeID := form.BenchmarkChildTypeID

@@ -2329,19 +2244,14 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
return
}

_, err = getBenchmarkGpuQueue(gpuQueue)
if err != nil {
log.Error("getBenchmarkGpuQueue failed:%v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("gpu queue error", tplCloudBrainBenchmarkNew, &form)
return
}

_, err = getBenchmarkResourceSpec(resourceSpecId)
if err != nil {
log.Error("getBenchmarkResourceSpec failed:%v", err, ctx.Data["MsgID"])
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeBenchmark,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("resource spec error", tplCloudBrainBenchmarkNew, &form)
ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, &form)
return
}

@@ -2402,14 +2312,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
}

benchmarkPath := setting.JobPath + jobName + cloudbrain.BenchMarkMountPath
var gpuType string
for _, gpuInfo := range gpuInfos.GpuInfo {
if gpuInfo.Queue == gpuQueue {
gpuType = gpuInfo.Value
}
}

if err := downloadRateCode(repo, jobName, childInfo.Owner, childInfo.RepoName, benchmarkPath, form.BenchmarkCategory, gpuType, ctx.User.Name); err != nil {
if err := downloadRateCode(repo, jobName, childInfo.Owner, childInfo.RepoName, benchmarkPath, form.BenchmarkCategory, spec.AccCardType, ctx.User.Name); err != nil {
log.Error("downloadRateCode failed, %v", err, ctx.Data["MsgID"])
//cloudBrainNewDataPrepare(ctx)
//ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form)
@@ -2431,7 +2335,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form)
return
}

log.Info("Command=" + command)
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"))
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -2447,7 +2352,6 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: string(models.JobTypeBenchmark),
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: cloudbrain.DefaultBranchName,
BootFile: "",
@@ -2455,8 +2359,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
CommitID: "",
BenchmarkTypeID: benchmarkTypeID,
BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec,
}

err = cloudbrain.GenerateTask(req)
@@ -2476,9 +2380,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
image := form.Image
uuid := form.Attachment
jobType := form.JobType
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := cloudbrain.DefaultBranchName
repo := ctx.Repo.Repository

@@ -2560,7 +2462,18 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return
}

spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeBenchmark,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}
log.Info("Command=" + command)
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"))
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -2576,7 +2489,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description,
BranchName: branchName,
BootFile: form.BootFile,
@@ -2584,8 +2496,8 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
CommitID: "",
BenchmarkTypeID: 0,
BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec,
}

err = cloudbrain.GenerateTask(req)
@@ -2718,7 +2630,7 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) {
}
}

command += "python /code/" + bootFile + param + " | tee " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile
command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile

return command, nil
}
@@ -2763,6 +2675,8 @@ func GetCloudbrainAiCenter(task models.Cloudbrain, ctx *context.Context) string
return ctx.Tr("repo.cloudbrain1")
} else if task.Type == models.TypeCloudBrainTwo {
return ctx.Tr("repo.cloudbrain2")
} else if task.Type == models.TypeCDCenter {
return ctx.Tr("repo.cdCenter")
} else if task.Type == models.TypeC2Net {
return getCutStringAiCenterByAiCenter(task.AiCenter)
}
@@ -2777,7 +2691,7 @@ func getCutStringAiCenterByAiCenter(aiCenter string) string {

}
func GetCloudbrainCluster(task models.Cloudbrain, ctx *context.Context) string {
if task.Type == models.TypeCloudBrainOne || task.Type == models.TypeCloudBrainTwo {
if task.Type == models.TypeCloudBrainOne || task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter {
return ctx.Tr("cloudbrain.resource_cluster_openi")
} else if task.Type == models.TypeC2Net {
return ctx.Tr("cloudbrain.resource_cluster_c2net")
@@ -2864,10 +2778,10 @@ func GetCloudbrainFlavorName(task models.Cloudbrain) (string, error) {
return CloudbrainOneFlavorName, nil
}
}
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeC2Net) && task.FlavorName != "" {
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeC2Net || task.Type == models.TypeCDCenter) && task.FlavorName != "" {
replaceFlavorName := strings.ReplaceAll(task.FlavorName, ":", ":")
return replaceFlavorName, nil
} else if task.Type == models.TypeCloudBrainTwo && task.FlavorName == "" && task.FlavorCode != "" {
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter) && task.FlavorName == "" && task.FlavorCode != "" {
cloudbrainTwoFlavorName := getFlavorNameByFlavorCode(task.FlavorCode)
return cloudbrainTwoFlavorName, nil
} else if task.Type == models.TypeCloudBrainTwo && task.JobType == string(models.JobTypeDebug) && task.FlavorName == "" && task.FlavorCode == "" {


+ 2
- 7
routers/repo/dataset.go View File

@@ -45,15 +45,10 @@ func newFilterPrivateAttachments(ctx *context.Context, list []*models.Attachment
repo.GetOwner()
}
permission := false
if repo.Owner.IsOrganization() && ctx.User != nil {
if repo.Owner.IsUserPartOfOrg(ctx.User.ID) {
log.Info("user is member of org.")
permission = true
}
}
if !permission && ctx.User != nil {
isCollaborator, _ := repo.IsCollaborator(ctx.User.ID)
if isCollaborator {
isInRepoTeam,_:=repo.IsInRepoTeam(ctx.User.ID)
if isCollaborator ||isInRepoTeam {
log.Info("Collaborator user may visit the attach.")
permission = true
}


+ 42
- 15
routers/repo/grampus.go View File

@@ -1,6 +1,7 @@
package repo

import (
"code.gitea.io/gitea/services/cloudbrain/resource"
"encoding/json"
"errors"
"fmt"
@@ -106,15 +107,11 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err
}
}

//get valid resource specs
specs, err := grampus.GetResourceSpecs(processType)

grampusSpecs := getFilterSpecBySpecialPool(specs, includeCenters, excludeCenters)

if err != nil {
log.Error("GetResourceSpecs failed:", err.Error())
} else {
ctx.Data["flavor_infos"] = grampusSpecs
//prepare available specs
if processType == grampus.ProcessorTypeNPU {
prepareGrampusTrainSpecs(ctx, models.NPU)
} else if processType == grampus.ProcessorTypeGPU {
prepareGrampusTrainSpecs(ctx, models.GPU)
}

//get branches
@@ -140,6 +137,15 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err
return nil
}

func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) {
noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: computeResource,
Cluster: models.C2NetCluster,
})
ctx.Data["Specs"] = noteBookSpecs
}

func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec {
if len(includeCenters) == 0 && len(excludeCenters) == 0 {
return specs.Infos
@@ -206,7 +212,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/"
dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid
branchName := form.BranchName
flavorName := form.FlavorName
image := strings.TrimSpace(form.Image)

if !jobNamePattern.MatchString(displayJobName) {
@@ -272,6 +277,18 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
}
}

//check specification
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.GPU,
Cluster: models.C2NetCluster,
})
if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobGPUNew, &form)
return
}

//check dataset
attachment, err := models.GetAttachmentByUUID(uuid)
if err != nil {
@@ -336,7 +353,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
ComputeResource: models.GPUResource,
ProcessType: grampus.ProcessorTypeGPU,
Command: command,
ResourceSpecId: form.FlavorID,
ImageUrl: image,
Description: description,
BootFile: bootFile,
@@ -344,12 +360,12 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
CommitID: commitID,
BranchName: branchName,
Params: form.Params,
FlavorName: flavorName,
EngineName: image,
DatasetName: attachment.Name,
IsLatestVersion: modelarts.IsLatestVersion,
VersionCount: modelarts.VersionCountOne,
WorkServerNumber: 1,
Spec: spec,
}

err = grampus.GenerateTrainJob(ctx, req)
@@ -397,7 +413,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
branchName := form.BranchName
isLatestVersion := modelarts.IsLatestVersion
flavorName := form.FlavorName
versionCount := modelarts.VersionCountOne
engineName := form.EngineName

@@ -464,6 +479,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
}
}

//check specification
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.NPU,
Cluster: models.C2NetCluster,
})
if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobNPUNew, &form)
return
}

//check dataset
attachment, err := models.GetAttachmentByUUID(uuid)
if err != nil {
@@ -518,7 +545,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
ComputeResource: models.NPUResource,
ProcessType: grampus.ProcessorTypeNPU,
Command: command,
ResourceSpecId: form.FlavorID,
ImageId: form.ImageID,
DataUrl: dataObsPath,
Description: description,
@@ -531,11 +557,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
IsLatestVersion: isLatestVersion,
BranchName: branchName,
Params: form.Params,
FlavorName: flavorName,
EngineName: engineName,
VersionCount: versionCount,
TotalVersionCount: modelarts.TotalVersionCount,
DatasetName: attachment.Name,
Spec: spec,
}

err = grampus.GenerateTrainJob(ctx, req)
@@ -712,6 +738,7 @@ func GrampusTrainJobShow(ctx *context.Context) {

taskList := make([]*models.Cloudbrain, 0)
taskList = append(taskList, task)
prepareSpec4Show(ctx, task)
ctx.Data["version_list_task"] = taskList
ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false)
ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task)


+ 241
- 151
routers/repo/modelarts.go View File

@@ -2,6 +2,8 @@ package repo

import (
"archive/zip"
"code.gitea.io/gitea/modules/modelarts_cd"
"code.gitea.io/gitea/services/cloudbrain/resource"
"encoding/json"
"errors"
"fmt"
@@ -60,18 +62,11 @@ func DebugJobIndex(ctx *context.Context) {
if page <= 0 {
page = 1
}
typeCloudBrain := models.TypeCloudBrainAll
jobTypeNot := false
if listType == models.GPUResource {
typeCloudBrain = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
typeCloudBrain = models.TypeCloudBrainTwo
} else if listType == models.AllResource {
typeCloudBrain = models.TypeCloudBrainAll
} else {
log.Error("listType(%s) error", listType)
ctx.ServerError("listType error", errors.New("listType error"))
return
var computeResource string
if listType != models.AllResource {
computeResource = listType
}

var jobTypes []string
@@ -81,10 +76,11 @@ func DebugJobIndex(ctx *context.Context) {
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: typeCloudBrain,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
RepoID: repo.ID,
ComputeResource: computeResource,
Type: models.TypeCloudBrainAll,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
})
if err != nil {
ctx.ServerError("Get debugjob faild:", err)
@@ -134,17 +130,9 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return err
}
ctx.Data["attachments"] = attachs
ctx.Data["images"] = setting.StImageInfos.ImageInfo

if modelarts.ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &modelarts.ImageInfos)
}
ctx.Data["images"] = modelarts.ImageInfos.ImageInfo

if modelarts.FlavorInfos == nil {
json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
}
ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeDebug))
prepareCloudbrainTwoDebugSpecs(ctx)

ctx.Data["datasetType"] = models.TypeCloudBrainTwo

@@ -154,6 +142,20 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return nil
}

func prepareCloudbrainTwoDebugSpecs(ctx *context.Context) {
aiCenterCode := models.AICenterOfCloudBrainTwo
if setting.ModelartsCD.Enabled {
aiCenterCode = models.AICenterOfChengdu
}
noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeDebug,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: aiCenterCode,
})
ctx.Data["Specs"] = noteBookSpecs
}

func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
ctx.Data["PageIsNotebook"] = true
jobName := form.JobName
@@ -204,7 +206,6 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
uuid := form.Attachment
description := form.Description
flavor := form.Flavor
imageId := form.ImageId
repo := ctx.Repo.Repository

@@ -239,15 +240,26 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
return
}
}

errStr := checkModelArtsSpecialPool(ctx, flavor, string(models.JobTypeDebug))
if errStr != "" {
var aiCenterCode = models.AICenterOfCloudBrainTwo
if setting.ModelartsCD.Enabled {
aiCenterCode = models.AICenterOfChengdu
}
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeDebug,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: aiCenterCode})
if err != nil || spec == nil {
notebookNewDataPrepare(ctx)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsNotebookNew, &form)
ctx.RenderWithErr("Resource specification not available", tplModelArtsNotebookNew, &form)
return
}
if setting.ModelartsCD.Enabled {
err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, imageId, spec)
} else {
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec)
}

err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
if err != nil {
log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
@@ -292,24 +304,7 @@ func NotebookShow(ctx *context.Context) {
if err == nil {
task.User = user
}
if modelarts.FlavorInfos == nil {
json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
}

findSpec := false
if modelarts.FlavorInfos != nil {
ctx.Data["resource_spec"] = modelarts.FlavorInfos.FlavorInfo[0].Desc
for _, f := range modelarts.FlavorInfos.FlavorInfo {
if fmt.Sprint(f.Value) == task.FlavorCode {
ctx.Data["resource_spec"] = f.Desc
findSpec = true
break
}
}
}

setShowSpecBySpecialPoolConfig(ctx, findSpec, task)

prepareSpec4Show(ctx, task)
if task.TrainJobDuration == "" {
if task.Duration == 0 {
var duration int64
@@ -394,36 +389,16 @@ func setShowSpecBySpecialPoolConfig(ctx *context.Context, findSpec bool, task *m
}
}

func NotebookDebug(ctx *context.Context) {
var jobID = ctx.Params(":jobid")

result, err := modelarts.GetJob(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}

res, err := modelarts.GetJobToken(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}

urls := strings.Split(result.Spec.Annotations.Url, "/")
urlPrefix := result.Spec.Annotations.TargetDomain
for i, url := range urls {
if i > 2 {
urlPrefix += "/" + url
}
}

debugUrl := urlPrefix + "?token=" + res.Token
ctx.Redirect(debugUrl)
}

func NotebookDebug2(ctx *context.Context) {
var err error
var result *models.GetNotebook2Result
task := ctx.Cloudbrain
result, err := modelarts.GetNotebook2(task.JobID)
if task.Type == models.TypeCloudBrainTwo {
result, err = modelarts.GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}

if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
@@ -437,6 +412,7 @@ func NotebookRestart(ctx *context.Context) {
var resultCode = "-1"
var errorMsg = ""
var status = ""
var spec *models.Specification

task := ctx.Cloudbrain

@@ -464,12 +440,40 @@ func NotebookRestart(ctx *context.Context) {
}
}

oldSpec, err := resource.GetCloudbrainSpec(task.ID)
if err != nil || oldSpec == nil {
log.Error("NotebookManage GetCloudbrainSpec error.%v", err)
errorMsg = "Resource specification not available"
break
}

aiCenterCode := models.AICenterOfCloudBrainTwo
if task.Type == models.TypeCDCenter {
aiCenterCode = models.AICenterOfChengdu
}
spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{
JobType: models.JobType(task.JobType),
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: aiCenterCode})
if err != nil || spec == nil {
log.Error("NotebookManage GetAndCheckSpec error.task.id = %d", task.ID)
errorMsg = "Resource specification not support any more"
break
}

createTime := timeutil.TimeStampNow()
param := models.NotebookAction{
Action: models.ActionStart,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}

if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"])
/* 暂不处理再次调试502的场景,详情见方案
@@ -507,8 +511,7 @@ func NotebookRestart(ctx *context.Context) {
Description: task.Description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
FlavorCode: task.FlavorCode,
FlavorName: task.FlavorName,
Spec: spec,
}

err = models.RestartCloudbrain(task, newTask)
@@ -555,7 +558,14 @@ func NotebookStop(ctx *context.Context) {
Action: models.ActionStop,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
var err error
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}

if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
@@ -605,7 +615,13 @@ func NotebookDel(ctx *context.Context) {
return
}

_, err := modelarts.DelNotebook2(task.JobID)
var err error
if task.Type == models.TypeCloudBrainTwo {
_, err = modelarts.DelNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
_, err = modelarts_cd.DelNotebook(task.JobID)
}

if err != nil {
log.Error("DelNotebook2(%s) failed:%v", task.JobName, err.Error())
if strings.Contains(err.Error(), modelarts.NotebookNotFound) || strings.Contains(err.Error(), modelarts.NotebookNoPermission) || strings.Contains(err.Error(), modelarts.NotebookInvalid) {
@@ -741,14 +757,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["flavor_infos"] = flavorInfos.Info

setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain))
prepareCloudbrainTwoTrainSpecs(ctx)

ctx.Data["params"] = ""
ctx.Data["branchName"] = ctx.Repo.BranchName
@@ -763,9 +772,33 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
ctx.Data["WaitCount"] = waitCount

setMultiNodeIfConfigureMatch(ctx)

return nil
}

func prepareCloudbrainTwoTrainSpecs(ctx *context.Context) {
noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo,
})
ctx.Data["Specs"] = noteBookSpecs
}

func setMultiNodeIfConfigureMatch(ctx *context.Context) {
modelarts.InitMultiNode()
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, ctx.User.ID); isInOrg {
ctx.Data["WorkNode"] = info.Node
break
}
}
}
}

func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) {
modelarts.InitSpecialPool()

@@ -848,13 +881,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["flavor_infos"] = flavorInfos.Info
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain))
prepareCloudbrainTwoTrainSpecs(ctx)

configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
if err != nil {
@@ -880,6 +907,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
ctx.Data["WaitCount"] = waitCount
setMultiNodeIfConfigureMatch(ctx)

return nil
}
@@ -942,14 +970,12 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
prepareCloudbrainTwoTrainSpecs(ctx)
spec, _ := resource.GetCloudbrainSpec(task.ID)
if spec != nil {
log.Info("spec_id = %d", spec.ID)
ctx.Data["spec_id"] = spec.ID
}
ctx.Data["flavor_infos"] = flavorInfos.Info

setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain))

var Parameters modelarts.Parameters
if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil {
@@ -1040,13 +1066,7 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["flavor_infos"] = flavorInfos.Info
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain))
prepareCloudbrainTwoTrainSpecs(ctx)

var Parameters modelarts.Parameters
if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil {
@@ -1099,7 +1119,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
workServerNumber := form.WorkServerNumber
engineID := form.EngineID
bootFile := strings.TrimSpace(form.BootFile)
flavorCode := form.Flavor
params := form.Params
poolID := form.PoolID
//isSaveParam := form.IsSaveParam
@@ -1115,6 +1134,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName

errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
return
}

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -1145,10 +1171,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
return
}

errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
if errStr != "" {
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo})
if err != nil || spec == nil {
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobNew, &form)
return
}
//Determine whether the task name of the task in the project is duplicated
@@ -1311,7 +1341,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
BootFileUrl: codeObsPath + bootFile,
BootFile: bootFile,
TrainUrl: outputObsPath,
FlavorCode: flavorCode,
WorkServerNumber: workServerNumber,
EngineID: int64(engineID),
LogUrl: logObsPath,
@@ -1327,6 +1356,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount: VersionCount,
TotalVersionCount: modelarts.TotalVersionCount,
DatasetName: datasetNames,
Spec: spec,
}
userCommand, userImageUrl := getUserCommand(engineID, req)
req.UserCommand = userCommand
@@ -1349,6 +1379,48 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}

func checkMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}
modelarts.InitMultiNode()
var isServerNumValid = false
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
if isInNodes(info.Node, serverNum) {
isServerNumValid = true
break
}

}
}
}
if isServerNumValid {
return ""
} else {
return "repo.modelarts.no_node_right"
}
}
func checkInferenceJobMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}

return "repo.modelarts.no_node_right"

}

func isInNodes(nodes []int, num int) bool {
for _, node := range nodes {
if node == num {
return true
}
}
return false

}

func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, string) {
userImageUrl := ""
userCommand := ""
@@ -1383,6 +1455,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.Data["PageIsTrainJob"] = true
var jobID = ctx.Params(":jobid")

errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
return
}

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -1412,7 +1491,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
workServerNumber := form.WorkServerNumber
engineID := form.EngineID
bootFile := strings.TrimSpace(form.BootFile)
flavorCode := form.Flavor
params := form.Params
poolID := form.PoolID
//isSaveParam := form.IsSaveParam
@@ -1450,10 +1528,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
return
}

errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
if errStr != "" {
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo})
if err != nil || spec == nil {
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobVersionNew, &form)
return
}

@@ -1607,7 +1689,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
BootFileUrl: codeObsPath + bootFile,
BootFile: bootFile,
TrainUrl: outputObsPath,
FlavorCode: flavorCode,
WorkServerNumber: workServerNumber,
IsLatestVersion: isLatestVersion,
EngineID: int64(engineID),
@@ -1624,6 +1705,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
PreVersionName: PreVersionName,
TotalVersionCount: latestTask.TotalVersionCount + 1,
DatasetName: datasetNames,
Spec: spec,
}
userCommand, userImageUrl := getUserCommand(engineID, req)
req.UserCommand = userCommand
@@ -1719,10 +1801,6 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
return errors.New("启动文件必须是python文件")
}

if form.WorkServerNumber > 2 || form.WorkServerNumber < 1 {
log.Error("the WorkServerNumber(%d) must be in (1,2)", form.WorkServerNumber)
return errors.New("计算节点数必须在1-2之间")
}
if form.BranchName == "" {
log.Error("the branch must not be null!", form.BranchName)
return errors.New("代码分支不能为空!")
@@ -1811,7 +1889,6 @@ func TrainJobShow(ctx *context.Context) {
for i, task := range VersionListTasks {

var parameters models.Parameters

err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), &parameters)
if err != nil {
log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err)
@@ -1832,6 +1909,14 @@ func TrainJobShow(ctx *context.Context) {
datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false))
VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)

//add spec
s, err := resource.GetCloudbrainSpec(task.Cloudbrain.ID)
if err != nil {
log.Error("TrainJobShow GetCloudbrainSpec error:" + err.Error())
continue
}
VersionListTasks[i].Cloudbrain.Spec = s
}

pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5)
@@ -1999,7 +2084,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
workServerNumber := form.WorkServerNumber
engineID := form.EngineID
bootFile := strings.TrimSpace(form.BootFile)
flavorCode := form.Flavor
params := form.Params
poolID := form.PoolID
repo := ctx.Repo.Repository
@@ -2021,6 +2105,13 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ckptUrl := "/" + form.TrainUrl + form.CkptName
log.Info("ckpt url:" + ckptUrl)

errStr := checkInferenceJobMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
return
}

count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -2069,13 +2160,16 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
}
}

errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeInference))
if errStr != "" {
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeInference,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo})
if err != nil || spec == nil {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
ctx.RenderWithErr("Resource specification not available", tplModelArtsInferenceJobNew, &form)
return
}

//todo: del the codeLocalPath
_, err = ioutil.ReadDir(codeLocalPath)
if err == nil {
@@ -2127,7 +2221,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid)
if err != nil {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form)
return
}
dataPath := dataUrl
@@ -2183,7 +2277,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
BootFileUrl: codeObsPath + bootFile,
BootFile: bootFile,
TrainUrl: trainUrl,
FlavorCode: flavorCode,
WorkServerNumber: workServerNumber,
EngineID: int64(engineID),
LogUrl: logObsPath,
@@ -2203,6 +2296,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ModelVersion: modelVersion,
CkptName: ckptName,
ResultUrl: resultObsPath,
Spec: spec,
DatasetName: datasetNames,
}

@@ -2245,7 +2339,7 @@ func checkModelArtsSpecialPool(ctx *context.Context, flavorCode string, jobType
if !isMatchPool {
isMatchSpec := false
if jobType == string(models.JobTypeDebug) {
for _, flavor := range modelarts.FlavorInfos.FlavorInfo {
for _, flavor := range setting.StFlavorInfo.FlavorInfo {
if flavor.Value == flavorCode {
isMatchSpec = true
break
@@ -2383,14 +2477,7 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error {
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}

ctx.Data["flavor_infos"] = flavorInfos.Info
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeInference))
prepareCloudbrainTwoInferenceSpecs(ctx)

ctx.Data["params"] = ""
ctx.Data["branchName"] = ctx.Repo.BranchName
@@ -2421,6 +2508,16 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error {
return nil
}

func prepareCloudbrainTwoInferenceSpecs(ctx *context.Context) {
noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeInference,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo,
})
ctx.Data["Specs"] = noteBookSpecs
}

func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error {
ctx.Data["PageIsCloudBrain"] = true

@@ -2455,14 +2552,7 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel
return err
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["flavor_infos"] = flavorInfos.Info
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeInference))
prepareCloudbrainTwoInferenceSpecs(ctx)

configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
if err != nil {
@@ -2537,7 +2627,7 @@ func InferenceJobShow(ctx *context.Context) {
} else {
task.Parameters = ""
}
prepareSpec4Show(ctx, task)
LabelName := strings.Fields(task.LabelName)
ctx.Data["labelName"] = LabelName
ctx.Data["jobID"] = jobID


+ 1
- 0
routers/response/response_list.go View File

@@ -2,3 +2,4 @@ package response

var RESOURCE_QUEUE_NOT_AVAILABLE = &BizError{Code: 1001, Err: "resource queue not available"}
var SPECIFICATION_NOT_EXIST = &BizError{Code: 1002, Err: "specification not exist"}
var SPECIFICATION_NOT_AVAILABLE = &BizError{Code: 1003, Err: "specification not available"}

+ 1
- 3
routers/user/home.go View File

@@ -836,14 +836,12 @@ func Cloudbrains(ctx *context.Context) {
ctx.ServerError("Get job failed:", err)
return
}
models.LoadSpecs4CloudbrainInfo(ciTasks)
for i, task := range ciTasks {
ciTasks[i].CanDebug = true
ciTasks[i].CanDel = true
ciTasks[i].Cloudbrain.ComputeResource = task.ComputeResource
ciTasks[i].Cloudbrain.AiCenter = repo.GetCloudbrainAiCenter(task.Cloudbrain, ctx)
_, cardType, _ := repo.GetCloudbrainCardNumAndType(task.Cloudbrain)
ciTasks[i].Cloudbrain.CardType = cardType
ciTasks[i].Cloudbrain.Cluster = repo.GetCloudbrainCluster(task.Cloudbrain, ctx)

}


+ 470
- 1
services/cloudbrain/resource/resource_specification.go View File

@@ -2,12 +2,19 @@ package resource

import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/admin/operate_log"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"time"
)

func AddResourceSpecification(doerId int64, req models.ResourceSpecificationReq) error {
@@ -92,6 +99,7 @@ func SyncGrampusSpecs(doerId int64) error {
GPUMemGiB: gpuMemGiB,
Status: models.SpecNotVerified,
IsAutomaticSync: true,
IsAvailable: true,
CreatedBy: doerId,
UpdatedBy: doerId,
})
@@ -103,6 +111,7 @@ func SyncGrampusSpecs(doerId int64) error {
CpuCores: spec.SpecInfo.CpuCoreNum,
MemGiB: memGiB,
GPUMemGiB: gpuMemGiB,
IsAvailable: true,
UpdatedBy: doerId,
})
}
@@ -142,7 +151,9 @@ func ResourceSpecOnShelf(doerId int64, id int64, unitPrice int) *response.BizErr
if q, err := models.GetResourceQueue(&models.ResourceQueue{ID: spec.QueueId}); err != nil || q == nil {
return response.RESOURCE_QUEUE_NOT_AVAILABLE
}

if !spec.IsAvailable {
return response.SPECIFICATION_NOT_AVAILABLE
}
err = models.ResourceSpecOnShelf(id, unitPrice)
if err != nil {
return response.NewBizError(err)
@@ -184,3 +195,461 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod
Comment: comment,
})
}

func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) {
r, err := models.FindSpecs(opts)
if err != nil {
log.Error("FindAvailableSpecs error.%v", err)
return nil, err
}
//filter exclusive specs
specs := filterExclusiveSpecs(r, userId)

//distinct by sourceSpecId
specs = distinctSpecs(specs)
return specs, err
}

func filterExclusiveSpecs(r []*models.Specification, userId int64) []*models.Specification {
specs := make([]*models.Specification, 0, len(r))
specMap := make(map[int64]string, 0)
for i := 0; i < len(r); i++ {
spec := r[i]
if _, has := specMap[spec.ID]; has {
continue
}
if !spec.IsExclusive {
specs = append(specs, spec)
specMap[spec.ID] = ""
continue
}
orgs := strings.Split(spec.ExclusiveOrg, ";")
for _, org := range orgs {
isMember, _ := models.IsOrganizationMemberByOrgName(org, userId)
if isMember {
specs = append(specs, spec)
specMap[spec.ID] = ""
break
}
}
}
return specs
}

func distinctSpecs(r []*models.Specification) []*models.Specification {
specs := make([]*models.Specification, 0, len(r))
sourceSpecIdMap := make(map[string]string, 0)
for i := 0; i < len(r); i++ {
spec := r[i]
if spec.SourceSpecId == "" {
specs = append(specs, spec)
continue
}
if _, has := sourceSpecIdMap[spec.SourceSpecId]; has {
continue
}
specs = append(specs, spec)
sourceSpecIdMap[spec.SourceSpecId] = ""
}
return specs
}

func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) {
if specId == 0 {
return nil, nil
}
opts.SpecId = specId
r, err := FindAvailableSpecs(userId, opts)
if err != nil {
return nil, err
}
if r == nil || len(r) == 0 {
return nil, nil
}
return r[0], nil
}

func InsertCloudbrainSpec(cloudbrainId int64, s *models.Specification) error {
c := models.CloudbrainSpec{
CloudbrainID: cloudbrainId,
SpecId: s.ID,
SourceSpecId: s.SourceSpecId,
AccCardsNum: s.AccCardsNum,
AccCardType: s.AccCardType,
CpuCores: s.CpuCores,
MemGiB: s.MemGiB,
GPUMemGiB: s.GPUMemGiB,
ShareMemGiB: s.ShareMemGiB,
ComputeResource: s.ComputeResource,
UnitPrice: s.UnitPrice,
QueueId: s.QueueId,
QueueCode: s.QueueCode,
Cluster: s.Cluster,
AiCenterCode: s.AiCenterCode,
AiCenterName: s.AiCenterName,
IsExclusive: s.IsExclusive,
ExclusiveOrg: s.ExclusiveOrg,
}
_, err := models.InsertCloudbrainSpec(c)
if err != nil {
log.Error("InsertCloudbrainSpec error.CloudbrainSpec=%v. err=%v", c, err)
return err
}
return nil
}

func GetCloudbrainSpec(cloudbrainId int64) (*models.Specification, error) {
c, err := models.GetCloudbrainSpecByID(cloudbrainId)
if err != nil {
return nil, err
}
if c == nil {
return nil, nil
}
return c.ConvertToSpecification(), nil
}

func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) {
var success int64
var total int64

if !scopeAll {
if ids == nil || len(ids) == 0 {
return 0, 0, nil
}
total = int64(len(ids))
tasks, err := models.GetCloudbrainWithDeletedByIDs(ids)
if err != nil {
return total, 0, err
}
for _, task := range tasks {
err = RefreshOneHistorySpec(task)
if err != nil {
log.Error("RefreshOneHistorySpec error.%v", err)
continue
}
success++
}

} else {
page := 1
pageSize := 100
n, err := models.CountNoSpecHistoricTask()
if err != nil {
log.Error("FindNoSpecHistoricTask CountNoSpecHistoricTask error. e=%v", err)
return 0, 0, err
}
total = n
for i := 0; i < 500; i++ {
list, err := models.FindCloudbrainTask(page, pageSize)
page++
if err != nil {
log.Error("FindCloudbrainTask error.page=%d pageSize=%d e=%v", page, pageSize, err)
return total, success, err
}
if len(list) == 0 {
log.Info("RefreshHistorySpec. list is empty")
break
}
for _, task := range list {
s, err := GetCloudbrainSpec(task.ID)
if err != nil {
log.Error("RefreshHistorySpec GetCloudbrainSpec error.%v", err)
continue
}
if s != nil {
continue
}
err = RefreshOneHistorySpec(task)
if err != nil {
log.Error("RefreshOneHistorySpec error.%v", err)
continue
}
success++
}
if len(list) < pageSize {
log.Info("RefreshHistorySpec. list < pageSize")
break
}
}
}
return total, success, nil

}

func RefreshOneHistorySpec(task *models.Cloudbrain) error {
var spec *models.Specification
var err error
switch task.Type {
case models.TypeCloudBrainOne:
spec, err = getCloudbrainOneSpec(task)
case models.TypeCloudBrainTwo:
spec, err = getCloudbrainTwoSpec(task)
case models.TypeC2Net:
spec, err = getGrampusSpec(task)
}
if err != nil {
log.Error("find spec error,task.ID=%d err=%v", task.ID, err)
return err
}
if spec == nil {
log.Error("find spec failed,task.ID=%d", task.ID)
return errors.New("find spec failed")
}
return InsertCloudbrainSpec(task.ID, spec)
}

func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error) {
if task.GpuQueue == "" {
log.Info("gpu queue is empty.task.ID = %d", task.ID)
return nil, nil
}
//find from config
spec, err := findCloudbrainOneSpecFromConfig(task)
if err != nil {
log.Error("getCloudbrainOneSpec findCloudbrainOneSpecFromConfig error.%v", err)
return nil, err
}
if spec != nil {
return spec, nil
}
//find from remote
return findCloudbrainOneSpecFromRemote(task)

}

func findCloudbrainOneSpecFromRemote(task *models.Cloudbrain) (*models.Specification, error) {
time.Sleep(200 * time.Millisecond)
log.Info("start findCloudbrainOneSpecFromRemote")
result, err := cloudbrain.GetJob(task.JobID)
if err != nil {
log.Error("getCloudbrainOneSpec error. %v", err)
return nil, err
}

if result == nil {
log.Info("findCloudbrainOneSpecFromRemote failed,result is empty.task.ID=%d", task.ID)
return nil, nil
}
jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
memSize, _ := models.ParseMemSizeFromGrampus(jobRes.Resource.Memory)
if task.ComputeResource == "CPU/GPU" {
task.ComputeResource = models.GPU
}
var shmMB float32
if jobRes.Config.TaskRoles != nil && len(jobRes.Config.TaskRoles) > 0 {
shmMB = float32(jobRes.Config.TaskRoles[0].ShmMB) / 1024
if jobRes.Config.TaskRoles[0].ShmMB == 103600 {
shmMB = 100
} else if jobRes.Config.TaskRoles[0].ShmMB == 51800 {
shmMB = 50
}
}
opt := models.FindSpecsOptions{
ComputeResource: task.ComputeResource,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne,
QueueCode: task.GpuQueue,
AccCardsNum: jobRes.Resource.NvidiaComGpu,
UseAccCardsNum: true,
CpuCores: jobRes.Resource.CPU,
UseCpuCores: true,
MemGiB: memSize,
UseMemGiB: memSize > 0,
ShareMemGiB: shmMB,
UseShareMemGiB: shmMB > 0,
RequestAll: true,
}
specs, err := models.FindSpecs(opt)
if err != nil {
log.Error("getCloudbrainOneSpec from remote error,%v", err)
return nil, err
}
if len(specs) == 1 {
return specs[0], nil
}
if len(specs) == 0 {
s, err := InitQueueAndSpec(opt, "云脑一", "处理历史云脑任务时自动添加")
if err != nil {
log.Error("getCloudbrainOneSpec InitQueueAndSpec error.err=%v", err)
return nil, nil
}
return s, nil
}
log.Error("Too many results matched.size=%d opt=%+v", len(specs), opt)
return nil, nil
}

func findCloudbrainOneSpecFromConfig(task *models.Cloudbrain) (*models.Specification, error) {
//find from config
var specConfig *models.ResourceSpec
hasSpec := false
if task.JobType == string(models.JobTypeTrain) {
if cloudbrain.TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
}
for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
specConfig = tmp
break
}
}
} else if task.JobType == string(models.JobTypeInference) {
if cloudbrain.InferenceResourceSpecs == nil {
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs)
}
for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
specConfig = tmp
break
}
}
} else {
if cloudbrain.ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs)
}
for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec {
if tmp.Id == task.ResourceSpecId {
hasSpec = true
specConfig = tmp
break

}
}
}
if !hasSpec && cloudbrain.SpecialPools != nil {

for _, specialPool := range cloudbrain.SpecialPools.Pools {

if specialPool.ResourceSpec != nil {

for _, spec := range specialPool.ResourceSpec {
if task.ResourceSpecId == spec.Id {
hasSpec = true
specConfig = spec
break
}
}
}
}
}
if specConfig == nil {
log.Error("getCloudbrainOneSpec from config failed,task.ResourceSpecId=%d", task.ResourceSpecId)
return nil, nil
}
if task.ComputeResource == "CPU/GPU" {
task.ComputeResource = models.GPU
}

shareMemMiB := float32(specConfig.ShareMemMiB) / 1024
if specConfig.ShareMemMiB == 103600 {
shareMemMiB = 100
} else if specConfig.ShareMemMiB == 51800 {
shareMemMiB = 50
}
opt := models.FindSpecsOptions{
JobType: models.JobType(task.JobType),
ComputeResource: task.ComputeResource,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne,
QueueCode: task.GpuQueue,
AccCardsNum: specConfig.GpuNum,
UseAccCardsNum: true,
CpuCores: specConfig.CpuNum,
UseCpuCores: true,
MemGiB: float32(specConfig.MemMiB) / 1024,
UseMemGiB: true,
ShareMemGiB: shareMemMiB,
UseShareMemGiB: true,
RequestAll: true,
}
specs, err := models.FindSpecs(opt)
if err != nil {
log.Error("getCloudbrainOneSpec from config error,%v", err)
return nil, err
}
if len(specs) > 1 {
log.Error("Too many results matched.size=%d opt=%+v", len(specs), opt)
return nil, nil
}
if len(specs) == 0 {
s, err := InitQueueAndSpec(opt, "云脑一", "处理历史云脑任务时自动添加")
if err != nil {
log.Error("getCloudbrainOneSpec InitQueueAndSpec error.err=%v", err)
return nil, nil
}
return s, nil
}
return specs[0], nil
}

func getCloudbrainTwoSpec(task *models.Cloudbrain) (*models.Specification, error) {
specMap, err := models.GetCloudbrainTwoSpecs()
if err != nil {
log.Error("InitCloudbrainTwoSpecs err.%v", err)
return nil, err
}
if task.FlavorCode != "" {
return specMap[task.FlavorCode], nil
}
time.Sleep(200 * time.Millisecond)
log.Info("start getCloudbrainTwoSpec FromRemote")
if task.JobType == string(models.JobTypeDebug) {
result, err := modelarts.GetNotebook2(task.JobID)
if err != nil {
log.Error("getCloudbrainTwoSpec GetNotebook2 error.%v", err)
return nil, err
}
if result != nil {
return specMap[result.Flavor], nil
}
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("getCloudbrainTwoSpec GetTrainJob error:%v", task.JobName, err)
return nil, err
}
if result != nil {
return specMap[result.Flavor.Code], nil
}
}
return nil, nil
}

func getGrampusSpec(task *models.Cloudbrain) (*models.Specification, error) {
specMap, err := models.GetGrampusSpecs()
if err != nil {
log.Error("GetGrampusSpecs err.%v", err)
return nil, err
}
if task.AiCenter != "" {
c := strings.Split(task.AiCenter, "+")
spec := specMap[task.FlavorCode+"_"+c[0]]
if spec != nil {
return spec, nil
}
}
return specMap[task.FlavorCode], nil
}

func InitQueueAndSpec(opt models.FindSpecsOptions, aiCenterName string, remark string) (*models.Specification, error) {
return models.InitQueueAndSpec(models.ResourceQueue{
QueueCode: opt.QueueCode,
Cluster: opt.Cluster,
AiCenterCode: opt.AiCenterCode,
AiCenterName: aiCenterName,
ComputeResource: opt.ComputeResource,
AccCardType: models.GetCloudbrainOneAccCardType(opt.QueueCode),
Remark: remark,
}, models.ResourceSpecification{
AccCardsNum: opt.AccCardsNum,
CpuCores: opt.CpuCores,
MemGiB: opt.MemGiB,
GPUMemGiB: opt.GPUMemGiB,
ShareMemGiB: opt.ShareMemGiB,
Status: models.SpecOffShelf,
IsAvailable: true,
})
}

+ 12
- 4
templates/admin/cloudbrain/list.tmpl View File

@@ -1,4 +1,5 @@
{{template "base/head" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<!-- 弹窗 -->
<div id="mask">
<div id="loadingPage">
@@ -175,10 +176,17 @@
</div>
<!-- XPU类型 -->
<div class="one wide column text center nowrap" style="width:8% !important;">
<span style="font-size: 12px;" title="{{.CardType}}">
{{if .CardType}}{{.CardType}}{{else}}--{{end}}
</span>
</div>
<span style="font-size: 12px;" title="" class="card_type_{{.DisplayJobName}}_{{$JobID}}"></span>
</div>
<script>
(function(){
var spec = {{.Spec}} || {};
var cardType = getListValueWithKey(ACC_CARD_TYPE, spec.AccCardType) || '--';
var spanEl = document.querySelector('.card_type_{{.DisplayJobName}}_{{$JobID}}');
spanEl.setAttribute('title', cardType);
spanEl.innerText = cardType;
})();
</script>
<!-- 创建者 -->
<div class="one wide column text center nowrap" style="width:4% !important;">
{{if .User.Name}}


+ 1
- 1
templates/admin/resources/queue.tmpl View File

@@ -4,7 +4,7 @@
{{template "admin/navbar" .}}
<div class="ui container">
<div id="__vue-root"></div>
</duv>
</div>
</div>
<script src="{{StaticUrlPrefix}}/js/vp-resources-queue.js?v={{MD5 AppVer}}"></script>
{{template "base/footer" .}}

+ 1
- 1
templates/admin/resources/scene.tmpl View File

@@ -4,7 +4,7 @@
{{template "admin/navbar" .}}
<div class="ui container">
<div id="__vue-root"></div>
</duv>
</div>
</div>
<script src="{{StaticUrlPrefix}}/js/vp-resources-scene.js?v={{MD5 AppVer}}"></script>
{{template "base/footer" .}}

+ 1
- 1
templates/admin/resources/specification.tmpl View File

@@ -4,7 +4,7 @@
{{template "admin/navbar" .}}
<div class="ui container">
<div id="__vue-root"></div>
</duv>
</div>
</div>
<script src="{{StaticUrlPrefix}}/js/vp-resources-specification.js?v={{MD5 AppVer}}"></script>
{{template "base/footer" .}}

+ 25
- 0
templates/custom/task_wait_count.tmpl View File

@@ -0,0 +1,25 @@
<div style="display:inline-block;">
<div style="display:flex;align-items:center;color:#f2711c;">
<i class="ri-error-warning-line" style="margin-right: 0.5rem; font-size: 14px"></i>
<span style="font-size: 12px">{{.i18n.Tr "repo.wait_count_start"}} <span class="__task_wait_count__">{{.WaitCount}}</span> {{.i18n.Tr "repo.wait_count_end"}}</span>
</div>
</div>
<script>
;(function() {
var queuesDetail = {{.QueuesDetail}};
if (queuesDetail) {
function changeSpecs() {
var specsSelEl = $('select#__specs__');
var seldOption = specsSelEl.find('option:selected');
var queueCode = seldOption.attr('queueCode');
$('span.__task_wait_count__').text(queuesDetail[queueCode] || 0);
};
$('body').on('change', 'select#__specs__', function(e) {
changeSpecs();
});
setTimeout(function() {
changeSpecs();
}, 50);
}
})();
</script>

+ 49
- 9
templates/repo/cloudbrain/benchmark/new.tmpl View File

@@ -51,9 +51,12 @@
<a class="active item model_benchmark"
href="{{.Link}}?benchmarkMode=model">{{.i18n.Tr "repo.cloudbrain.benchmark.model"}}</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $ "type" .benchmark_gpu_types}}
</div>
<div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 80%;" name="display_job_name" id="trainjob_job_name"
@@ -71,7 +74,7 @@
onkeyup="this.value=this.value.substring(0, 255)">{{.description}}</textarea>
</div>

<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.gpu_type"}}</label>
<select id="cloudbrain_gpu_type" class="ui search dropdown width48" placeholder="选择GPU类型"
name="gpu_type">
@@ -79,7 +82,7 @@
<option value="{{.Queue}}">{{.Value}}</option>
{{end}}
</select>
</div>
</div>-->
<div class="required unite min_title two inline fields" style="margin-left: 80px;">
<div class="required ten wide field" style="width: 26.5% !important;">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.task_type"}}</label>&nbsp;
@@ -112,7 +115,7 @@
<div id="images-new-cb">
</div>
{{template "custom/select_dataset_train" .}}
<div class="required min_title inline field" style="margin-top:2rem;">
<!--<div class="required min_title inline field" style="margin-top:2rem;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="cloudbrain_resource_spec" class="ui search dropdown"
placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px'
@@ -123,6 +126,13 @@
</option>
{{end}}
</select>
</div>-->
<div class="required min_title inline field" style="margin-top:2rem;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui search dropdown width48"
placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px' ovalue="{{.spec_id}}"
name="spec_id">
</select>
</div>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;"></label>
@@ -146,10 +156,13 @@
<a class="item model_benchmark"
href="{{.Link}}?benchmarkMode=model">{{.i18n.Tr "repo.cloudbrain.benchmark.model"}}</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $ "type" .benchmark_gpu_types}}
</div>

<div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 80%;" name="display_job_name" id="trainjob_job_name"
@@ -167,7 +180,7 @@
onkeyup="this.value=this.value.substring(0, 255)">{{.description}}</textarea>
</div>

<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.gpu_type"}}</label>
<select id="cloudbrain_gpu_type" class="ui search dropdown" placeholder="选择GPU类型"
style='width:385px' name="gpu_type">
@@ -175,7 +188,7 @@
<option value="{{.Queue}}">{{.Value}}</option>
{{end}}
</select>
</div>
</div>-->
<div class="required unite inline min_title fields" style="width: 90%;margin-left: 5.7rem;">&nbsp;
<div class="required eight wide field">
<label style="font-weight: normal;white-space: nowrap;">{{.i18n.Tr "repo.cloudbrain.benchmark.evaluate_type"}}</label>
@@ -201,7 +214,7 @@
<div id="images-new-cb">
</div>

<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="cloudbrain_resource_spec" class="ui search dropdown"
placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px'
@@ -212,6 +225,14 @@
</option>
{{end}}
</select>
</div>-->

<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui search dropdown width48"
placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px' ovalue="{{.spec_id}}"
name="spec_id">
</select>
</div>

<div class="inline min_title field required">
@@ -245,7 +266,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
let form = document.getElementById('form_id');
let createFlag = false
@@ -304,6 +325,7 @@
}
}
}
var isValidate = false;
function validate() {
$('.ui.form')
.form({
@@ -327,12 +349,18 @@
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function () {
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function (e) {
isValidate = false;
return false;
}
})
@@ -342,4 +370,16 @@
$('.ui.create_train_job.green.button').click(function (e) {
validate()
})

;(function() {
var SPECS = {{ .benchmark_specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 59
- 19
templates/repo/cloudbrain/benchmark/show.tmpl View File

@@ -256,8 +256,9 @@
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item"
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second{{$k}}"
onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item log_bottom" data-tab="second{{$k}}"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
@@ -453,7 +454,7 @@
{{$.i18n.Tr "cloudbrain.gpu_type"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content resorce_type">
<div class="text-span text-span-w">
{{$.resource_type}}
</div>
@@ -464,9 +465,9 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec">
<div class="text-span text-span-w">
{{$.resource_spec}}
{{$.resource_spec}}
</div>
</td>
</tr>
@@ -528,19 +529,42 @@
</div>
<div class="ui tab" data-tab="second{{$k}}">
<div>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>

</div>

</div>
<a id="{{.VersionName}}-log-down"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}'
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

</div>
</div>
@@ -571,7 +595,22 @@

</div>
{{template "base/footer" .}}

<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPEC = {{ $.Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
$('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>
<script>
$('.menu .item').tab()

@@ -606,4 +645,5 @@
});
}

</script>

+ 37
- 11
templates/repo/cloudbrain/inference/new.tmpl View File

@@ -82,8 +82,11 @@
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $ "type" .inference_gpu_types}}
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;">
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_infer_gpu_tooltips" "/dataset" "/model" "/result" | Safe}}</span>
</div>
@@ -140,7 +143,7 @@
<span >
<i class="question circle icon" data-content="{{.i18n.Tr "cloudbrain.model_file_postfix_rule"}}" data-position="top center" data-variation="inverted mini"></i>
</span>
</div>
<!-- AI引擎 -->
<div id="images-new-cb">
@@ -168,7 +171,7 @@
</select>
</div>
<!-- GPU 卡的类型 -->
<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.gpu_type"}}</label>
<select id="cloudbrain_gpu_type" class="ui search width48 dropdown gpu-type" placeholder="选择GPU类型"
style='width:385px' name="gpu_type">
@@ -189,7 +192,7 @@
{{end}}
{{end}}
</select>
</div>
</div>-->
<!-- 数据集-->
<div id="select-multi-dataset">

@@ -227,7 +230,7 @@
</select>
</div>
<!-- 规格 -->
<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="cloudbrain_resource_spec" class="ui search dropdown width80" placeholder="选择资源规格" name="resource_spec_id">
{{if .resource_spec_id}}
@@ -248,8 +251,12 @@
{{end}}
{{end}}
</select>
</div>-->
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui search dropdown width48" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" ovalue="{{.spec_id}}" name="spec_id">
</select>
</div>
<!-- 表单操作 -->
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;"></label>
@@ -264,7 +271,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
let form = document.getElementById('form_id');
let createFlag = false
@@ -299,7 +306,7 @@
$('#select_model').removeClass("loading")
})
})
// 根据选中的模型名称获取相应的模型版本
function modelVersion(){
let faildModelVersion = $('#failed_model_version').val()
@@ -318,7 +325,7 @@
$("#select_model_version").removeClass("loading")
const initVersionText = $('#model_name_version div.item:first-child').text()
const initVersionValue = $('#model_name_version div.item:first-child').data('value')
if(faildModelVersion&&faildTrainUrl){
$("#select_model_version").dropdown('set text',faildModelVersion)
$("#select_model_version").dropdown('set value',faildTrainUrl,faildModelVersion,$('#model_name_version div.item:first-child'))
@@ -381,7 +388,7 @@
params&&params.parameter.forEach((item,index)=>{
Add_parameter(index,flag=true,item)
})
})
// 参数增加、删除、修改、保存
function Add_parameter(i,flag=false,paramsObject={}) {
@@ -436,6 +443,8 @@
msg = JSON.stringify(msg)
$('#store_run_para').val(msg)
}
var isValidate = false;
function validate(){
$('.ui.form')
.form({
@@ -489,12 +498,18 @@
type: 'empty',
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -509,4 +524,15 @@
send_run_para();
validate();
})
;(function() {
var SPECS = {{ .inference_specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 58
- 6
templates/repo/cloudbrain/inference/show.tmpl View File

@@ -228,7 +228,7 @@
</h4>
{{with .task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/cloudbrain/inference-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}">
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}">
<div class="active title padding0">
<div class="according-panel-heading">
@@ -264,7 +264,8 @@
data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second"
onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
<a class="item log_bottom" data-tab="third"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item load-model-file" data-tab="four"
data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/inference-job/{{.JobID}}/result_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
@@ -340,7 +341,7 @@
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}}
</td>
<td class="ti-text-form-content">
<td class="ti-text-form-content resorce_type">
<div class="text-span text-span-w">
{{$.resource_type}}
</div>
@@ -482,7 +483,7 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec">
<div class="text-span text-span-w">
{{$.i18n.Tr "cloudbrain.gpu_num"}}:{{$.GpuNum}},{{$.i18n.Tr "cloudbrain.cpu_num"}}:{{$.CpuNum}},{{$.i18n.Tr "cloudbrain.memory"}}(MB):{{$.MemMiB}},{{$.i18n.Tr "cloudbrain.shared_memory"}}(MB):{{$.ShareMemMiB}}
</div>
@@ -524,7 +525,7 @@
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}"
<div class="ui attached"
style="height: 390px !important; overflow: auto;">
<input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}">
<input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}">
@@ -537,7 +538,44 @@

</div>

<div class="ui tab" data-tab="third">
<div>
<a id="{{.VersionName}}-log-down"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}'
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

<div class="ui tab" data-tab="four">
<input type="hidden" name="model{{.VersionName}}" value="-1">
@@ -561,6 +599,7 @@

</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
$('.menu .item').tab()
@@ -621,4 +660,17 @@
document.getElementById("info_display").innerHTML = html;
}

;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
$('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>

+ 42
- 11
templates/repo/cloudbrain/new.tmpl View File

@@ -25,7 +25,7 @@
<div class="column">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-queue="{{.QueuesDetail}}" data-queue-start="{{.i18n.Tr "repo.wait_count_start"}}" data-queue-end="{{.i18n.Tr "repo.wait_count_end"}}"></div>
{{template "base/alert" .}}
<div class="ui negative message" id="messageInfo">
<div class="ui negative message" id="messageInfo" style="display:none;">
<p></p>
</div>
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
@@ -55,8 +55,11 @@
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count" .}}
</div>
</div>
<div class="inline field">
<label></label>
{{template "custom/task_wait_count" .}}
</div>
<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.task_name"}}</label>
@@ -108,8 +111,8 @@
{{end}}
{{end}}
</select>
</div>
<div class="inline required field">
</div>
<!--<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.gpu_type"}}</label>
<select id="cloudbrain_gpu_type" class="ui search dropdown gpu-type" placeholder="选择GPU类型"
style='width:385px' name="gpu_type">
@@ -117,7 +120,7 @@
<option value="{{.Queue}}">{{.Value}}</option>
{{end}}
</select>
</div>
</div>-->

<div id="images-new-cb">

@@ -125,8 +128,8 @@
<div id="select-multi-dataset">

</div>
<div class="inline required field">
<!--<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="cloudbrain_resource_spec" class="ui search dropdown"
placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px'
@@ -137,6 +140,14 @@
</option>
{{end}}
</select>
</div>-->
<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui search dropdown"
placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px' ovalue="{{.spec_id}}"
name="spec_id">
</select>
</div>

<div class="inline required field">
@@ -188,6 +199,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
let form = document.getElementById('form_id');
$('#messageInfo').css('display', 'none')
@@ -196,6 +208,7 @@
context.value = ''
$(".icon.icons").css("visibility", "hidden")
}
var isValidate = false;
function validate(){
$('.ui.form').form({
on: 'blur',
@@ -207,11 +220,17 @@
type: 'regExp[/^[a-z0-9][a-z0-9-_]{1,34}[a-z0-9-]$/]',
}
]
},
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
onSuccess: function(){
isValidate = true;
},
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -219,6 +238,7 @@
validate();
let createFlag = false
form.onsubmit = function (e) {
if (!isValidate) return false;
if(createFlag) return false
let value_task = $("input[name='display_job_name']").val()
let value_image = $("input[name='image']").val()
@@ -286,7 +306,18 @@
}
selected_value = $("#cloudbrain_benchmark_category").val()
$('#store_category').attr("value", selected_value)
validate();
})

;(function() {
var SPECS = {{ .debug_specs }};
var showPoint = true;
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 17
- 6
templates/repo/cloudbrain/show.tmpl View File

@@ -345,7 +345,7 @@
{{$.i18n.Tr "cloudbrain.gpu_type"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content resorce_type">
<div class="text-span text-span-w">
{{$.resource_type}}
</div>
@@ -400,10 +400,8 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{$.i18n.Tr "cloudbrain.gpu_num"}}:{{$.GpuNum}},{{$.i18n.Tr "cloudbrain.cpu_num"}}:{{$.CpuNum}},{{$.i18n.Tr "cloudbrain.memory"}}(MB):{{$.MemMiB}},{{$.i18n.Tr "cloudbrain.shared_memory"}}(MB):{{$.ShareMemMiB}}
</div>
<td class="ti-text-form-content spec">
<div class="text-span text-span-w"></div>
</td>
</tr>
<tr class="ti-no-ng-animate">
@@ -560,7 +558,7 @@

</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
$('.menu .item').tab()
$(document).ready(function () {
@@ -601,4 +599,17 @@
}
document.getElementById("info_display").innerHTML = html;
}
;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
$('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>

+ 44
- 14
templates/repo/cloudbrain/trainjob/new.tmpl View File

@@ -14,7 +14,9 @@
.width {
width: 100% !important;
}

.width48 {
width: 48.5% !important;
}
.width80 {
width: 80.7% !important;
margin-left: 10px;
@@ -30,7 +32,7 @@
margin-left: 10.5rem !important;
align-items: center;
}
.width81 {
margin-left: 1.5rem !important;
width: 81% !important;
@@ -114,8 +116,11 @@
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $ "type" .train_gpu_types}}
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;">
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/code" "/dataset" "/model" | Safe}}</span>
</div>
@@ -171,7 +176,7 @@
<option name="job_type" value="TRAIN">TRAIN</option>
</select>
</div>
<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.gpu_type"}}</label>
<select id="cloudbrain_gpu_type" class="ui search width806 dropdown gpu-type" placeholder="选择GPU类型"
style='width:385px' name="gpu_type">
@@ -192,7 +197,7 @@
{{end}}
{{end}}
</select>
</div>
</div>-->
<div id="images-new-cb">

</div>
@@ -224,10 +229,10 @@
class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}">
</div>
</div>
<div class="required min_title inline field">
<!--<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="cloudbrain_resource_spec" class="ui search dropdown" placeholder="选择资源规格"
style='width:385px' name="resource_spec_id">
@@ -249,6 +254,13 @@
{{end}}
{{end}}
</select>
</div>-->

<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui dropdown width48" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" ovalue="{{.spec_id}}"
name="spec_id">
</select>
</div>

<div class="inline field" style="padding: 1rem 0;">
@@ -267,7 +279,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
let form = document.getElementById('form_id');
let createFlag = false
@@ -282,7 +294,7 @@
.tab();
$(document).keydown(function(event){
switch(event.keyCode){
case 13:return false;
case 13:return false;
}
});
$(document).ready(function(){
@@ -290,7 +302,7 @@
params&&params.parameter.forEach((item,index)=>{
Add_parameter(index,flag=true,item)
})
})
// 参数增加、删除、修改、保存
function Add_parameter(i,flag=false,paramsObject={}) {
@@ -342,7 +354,8 @@

$('select.dropdown')
.dropdown();

var isValidate = false;
function validate() {
$('.ui.form')
.form({
@@ -388,13 +401,19 @@
type: 'empty',
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function () {
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function (e) {
isValidate = false;
return false;
}
})
@@ -419,6 +438,17 @@
validate();
$('.ui.create_train_job.green.button').click(function (e) {
send_run_para();
validate();
validate();
})
</script>
;(function() {
var SPECS = {{ .train_specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 16
- 2
templates/repo/cloudbrain/trainjob/show.tmpl View File

@@ -359,7 +359,7 @@
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content resorce_type">
<div class="text-span text-span-w">
{{$.resource_type}}
</div>
@@ -370,7 +370,7 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec">
<div class="text-span text-span-w">
{{$.i18n.Tr "cloudbrain.gpu_num"}}:{{$.GpuNum}},{{$.i18n.Tr "cloudbrain.cpu_num"}}:{{$.CpuNum}},{{$.i18n.Tr "cloudbrain.memory"}}(MB):{{$.MemMiB}},{{$.i18n.Tr "cloudbrain.shared_memory"}}(MB):{{$.ShareMemMiB}}
</div>
@@ -677,6 +677,7 @@
{{template "base/footer" .}}
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js"></script>
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
var setting = {
@@ -987,4 +988,17 @@
document.getElementById("info_display").innerHTML = html;
}

;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
$('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>

+ 45
- 17
templates/repo/grampus/trainjob/gpu/new.tmpl View File

@@ -30,7 +30,9 @@
.width81{
width: 81% !important;
}

.width48{
width: 48.5% !important;
}
.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
@@ -104,9 +106,12 @@
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $}}
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;">
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "/tmp/output" | Safe}}</span>
</div>
@@ -116,7 +121,7 @@
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
<span class="tooltips" style="margin-left: 11.5rem;display: block;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
</div>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
@@ -128,7 +133,7 @@
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>

<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
@@ -167,7 +172,7 @@
<a href="https://git.openi.org.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

{{template "custom/select_dataset_train" .}}
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
@@ -175,11 +180,10 @@
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}">
</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<!--<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width81" id="trainjob-flavor" style='width:385px' name="flavor">
{{if .flavor}}
@@ -199,8 +203,12 @@
{{end}}
{{end}}
</select>
</div>-->
<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}"></select>
</div>
<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button">
@@ -208,14 +216,15 @@
</button>
<a class="ui button" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>
<!-- 模态框 -->
</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
let form = document.getElementById('form_id');
@@ -230,13 +239,13 @@
$('.menu .item')
.tab();

$(document).ready(function(){
let params = $('.dynamic.field').data('params')
params&&params.parameter.forEach((item,index)=>{
Add_parameter(index,flag=true,item)
})
})
// 参数增加、删除、修改、保存
function Add_parameter(i,flag=false,paramsObject={}) {
@@ -287,6 +296,7 @@
$('select.dropdown')
.dropdown();

var isValidate = false;
function validate(){
$('.ui.form')
.form({
@@ -333,13 +343,19 @@
type : 'integer[1..25]',
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function(e){
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -374,4 +390,16 @@
send_run_para()
validate();
})
</script>

;(function() {
var SPECS = {{ .Specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 38
- 10
templates/repo/grampus/trainjob/npu/new.tmpl View File

@@ -25,7 +25,9 @@
.width81{
width: 81% !important;
}

.width48 {
width: 48.5% !important;
}
.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
@@ -99,13 +101,15 @@
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $}}
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;">
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/cache/code" "/cache/dataset" "/cache/output" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
@@ -179,7 +183,7 @@
</span>
<a href="https://git.openi.org.cn/OpenIOSSG/MNIST_Example/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{template "custom/select_dataset_train" .}}
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
@@ -187,11 +191,11 @@
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}">
</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<!--<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width81" id="trainjob-flavor" style='width:385px' name="flavor">
{{if .flavor}}
@@ -211,6 +215,10 @@
{{end}}
{{end}}
</select>
</div>-->
<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}"></select>
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>
@@ -242,6 +250,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
let form = document.getElementById('form_id');
@@ -256,13 +265,13 @@
$('.menu .item')
.tab();

$(document).ready(function(){
let params = $('.dynamic.field').data('params')
params&&params.parameter.forEach((item,index)=>{
Add_parameter(index,flag=true,item)
})
})
// 参数增加、删除、修改、保存
function Add_parameter(i,flag=false,paramsObject={}) {
@@ -309,10 +318,11 @@
$(this).popup('show')
});

$('select.dropdown')
.dropdown();

var isValidate = false;
function validate(){
$('.ui.form')
.form({
@@ -350,13 +360,19 @@
type : 'integer[1..25]',
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -395,4 +411,16 @@
send_run_para()
validate();
})

;(function() {
var SPECS = {{ .Specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 14
- 2
templates/repo/grampus/trainjob/show.tmpl View File

@@ -357,7 +357,7 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec">
<div class="text-span text-span-w">
{{.FlavorName}}
</div>
@@ -634,8 +634,20 @@
{{template "base/footer" .}}
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js"></script>
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec').text(specStr);
})();
var setting = {
check: {
enable: true,


+ 35
- 8
templates/repo/modelarts/inferencejob/new.tmpl View File

@@ -83,8 +83,11 @@
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $}}
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;">
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.infer_dataset_path_rule" | Safe}}</span>
</div>
@@ -196,7 +199,7 @@
{{end}}
</select>
</div>
<!-- 数据集 -->
<div id="select-multi-dataset">

@@ -249,7 +252,7 @@
</select>
</div>
<!-- 规格 -->
<div class="required min_title inline field" id="flaver_name">
<!--<div class="required min_title inline field" id="flaver_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width80" id="trainjob-flavor" name="flavor">
{{if .flavor}}
@@ -269,6 +272,10 @@
{{end}}
{{end}}
</select>
</div>-->
<div class="required min_title inline field" id="flaver_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" name="spec_id" ovalue="{{.spec_id}}"></select>
</div>
<!-- 计算节点 -->
<div class="inline required min_title field">
@@ -292,7 +299,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
let form = document.getElementById('form_id');
let createFlag = false
@@ -305,7 +312,7 @@
let nameMap,nameList
$(".ui.button").attr('href',url_href)
// 获取模型列表和模型名称对应的模型版本
$(document).ready(function(){
modelVersion()
modelCkpt()
@@ -330,7 +337,7 @@
$('#select_model').removeClass("loading")
})
})
// 根据选中的模型名称获取相应的模型版本
function modelVersion(){
let faildModelVersion = $('#failed_model_version').val()
@@ -349,7 +356,7 @@
$("#select_model_version").removeClass("loading")
const initVersionText = $('#model_name_version div.item:first-child').text()
const initVersionValue = $('#model_name_version div.item:first-child').data('value')
if(faildModelVersion&&faildTrainUrl){
$("#select_model_version").dropdown('set text',faildModelVersion)
$("#select_model_version").dropdown('set value',faildTrainUrl,faildModelVersion,$('#model_name_version div.item:first-child'))
@@ -458,6 +465,8 @@
$("input#ai_engine_name").val(name1)
$("input#ai_flaver_name").val(name2)
}

var isValidate = false;
function validate(){
$('.ui.form')
.form({
@@ -511,12 +520,18 @@
type: 'empty',
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -532,4 +547,16 @@
get_name()
validate();
})

;(function() {
var SPECS = {{ .Specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 15
- 2
templates/repo/modelarts/inferencejob/show.tmpl View File

@@ -423,7 +423,7 @@ td, th {
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec">
<div class="text-span text-span-w">
{{.FlavorName}}
</div>
@@ -522,6 +522,7 @@ td, th {
<!-- 确认模态框 -->
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
console.log('{{$.canDownload}}')
$(document).ready(function(){
@@ -538,5 +539,17 @@ $(document).ready(function(){
repoPath = urlArr.slice(-4)[0]
jobID = urlArr.slice(-1)[0]
})

;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
// $('td.ti-text-form-content.resorce_type').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>

+ 33
- 6
templates/repo/modelarts/notebook/new.tmpl View File

@@ -38,7 +38,10 @@
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count" .}}
</div>
<div class="inline field">
<label></label>
{{template "custom/task_wait_count" .}}
</div>
<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.task_name"}}</label>
@@ -65,7 +68,7 @@
<label>类型</label>
<input name="job_type" id="cloudbrain_job_type" value="{{.notebook_type}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly">
</div> -->
<div class="inline required field">
<!--<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.specification"}}</label>
<select id="cloudbrain_flavor" class="ui search dropdown" placeholder="选择规格" style='width:385px' name="flavor">
{{range .flavors}}
@@ -73,6 +76,10 @@

{{end}}
</select>
</div>-->
<div class="inline required field">
<label>{{.i18n.Tr "cloudbrain.specification"}}</label>
<select id="__specs__" class="ui search dropdown" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" style='width:385px' name="spec_id" ovalue="{{.spec_id}}"></select>
</div>
<!--<div class="inline required field">
<label>数据集存放路径</label>
@@ -95,13 +102,14 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
// 判断必填选项是否填写正确
let form = document.getElementById('form_id');

$('#messageInfo').css('display','none')

var isValidate = false;
function validate(){
$('.ui.form').form({
on: 'blur',
@@ -113,11 +121,17 @@
type: 'regExp[/^[a-z0-9][a-z0-9-_]{1,34}[a-z0-9-]$/]',
}
]
},
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
onSuccess: function(){
isValidate = true;
},
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -125,7 +139,8 @@
validate();
let createFlag = false
form.onsubmit = function(e){
if(createFlag) return false
if(!isValidate) return false;
if(createFlag) return false;
let value_task = $("input[name='display_job_name']").val()
let re = /^[a-z0-9][a-z0-9-_]{1,34}[a-z0-9-]$/
let flag = re.test(value_task)
@@ -171,4 +186,16 @@
}
});
});

;(function() {
var SPECS = {{ .Specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 23
- 8
templates/repo/modelarts/notebook/show.tmpl View File

@@ -350,14 +350,14 @@

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn-image" style="cursor:pointer"
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn-image" style="cursor:pointer"
data-clipboard-text="{{.Image}}"
data-success="{{$.i18n.Tr "repo.copy_link_success"}}"
data-success="{{$.i18n.Tr "repo.copy_link_success"}}"
data-error="{{$.i18n.Tr "repo.copy_link_error"}}"
data-content="{{$.i18n.Tr "repo.copy_link"}}"
data-variation="inverted tiny"
>
<span title="{{.Image}}">{{.Image}}</span>
<span title="{{.Image}}">{{.Image}}</span>
</span>
</div>
</td>
@@ -367,7 +367,7 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec">
<div class="text-span text-span-w">
{{$.resource_spec}}
</div>
@@ -427,7 +427,7 @@
</div>
</div>

</div>
<div style="clear:both">
<table style="border:none" class="ui fixed small stackable table">
@@ -437,7 +437,7 @@
<th style="color: #8a8e99;font-size:12px" class="two wide center aligned">{{$.i18n.Tr "dataset.download_oper"}}</th>
</tr></thead>
<tbody>
{{range $.datasetDownload}}
{{range $.datasetDownload}}
<tr>
<td style="word-wrap: break-word;word-break: break-all;">
@@ -488,7 +488,7 @@

</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
$('.menu .item').tab()

@@ -498,4 +498,19 @@
$(document).ready(function () {
$('.secondary.menu .item').tab();
});
</script>
console.log({{$.datasetDownload}})

;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
$('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>

+ 52
- 10
templates/repo/modelarts/trainjob/new.tmpl View File

@@ -88,7 +88,7 @@
</a>
</div>
</div>
<div class="required inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
@@ -109,9 +109,12 @@
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
{{template "custom/wait_count_train" Dict "ctx" $}}
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;">
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.train_dataset_path_rule" | Safe}}</span>
</div>
@@ -203,7 +206,7 @@
</span>
<a href="https://git.openi.org.cn/OpenIOSSG/MINIST_Example" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
<div id="select-multi-dataset">

</div>
@@ -257,7 +260,7 @@
</div>
</div>
</div>
<div class="required inline min_title field" id="flaver_name">
<!--<div class="required inline min_title field" id="flaver_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="trainjob-flavor" name="flavor">
{{if .flavor}}
@@ -277,6 +280,10 @@
{{end}}
{{end}}
</select>
</div>-->
<div class="required inline min_title field" id="flaver_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" name="spec_id" ovalue="{{.spec_id}}"></select>
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>
@@ -287,8 +294,24 @@
id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1"
readonly>
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_id">
<select class="ui dropdown width" style='width: 100%;' name="work_server_id">
{{if .WorkNode}}
{{range .WorkNode}}

{{if $.work_server_number}}
{{if eq . $.work_server_number }}
<option name="server_id" selected value="{{.}}">{{.}}</option>
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{end}}

{{else}}
<option name="server_id" value="1">1</option>
{{end}}
</select>
</div>

@@ -310,7 +333,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
let form = document.getElementById('form_id');
let createFlag = false
@@ -327,7 +350,7 @@
.tab();
$(document).keydown(function(event){
switch(event.keyCode){
case 13:return false;
case 13:return false;
}
});
// 参数增加、删除、修改、保存
@@ -441,6 +464,7 @@
$('select.dropdown')
.dropdown();

var isValidate = false;
function validate() {
$('.ui.form')
.form({
@@ -478,13 +502,19 @@
type: 'integer[1..25]',
}
]
},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function () {
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function (e) {
isValidate = false;
return false;
}
})
@@ -523,4 +553,16 @@
send_run_para();
validate();
})
</script>

;(function() {
var SPECS = {{ .Specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 18
- 1
templates/repo/modelarts/trainjob/show.tmpl View File

@@ -1,5 +1,7 @@
{{template "base/head" .}}
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css" type="text/css">
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<style>
.according-panel-heading {
box-sizing: border-box;
@@ -395,11 +397,26 @@
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content">
<td class="ti-text-form-content spec{{$k}}">
<div class="text-span text-span-w">
{{.FlavorName}}
</div>
</td>
<script>
;(function() {
var SPEC = {{ .Spec }};
var showPoint = true;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec{{$k}} div').text(specStr);
// $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>
</tr>
</tbody>


+ 30
- 53
templates/repo/modelarts/trainjob/version_new.tmpl View File

@@ -73,7 +73,6 @@
<input type="hidden" id="ai_engine_name" name="engine_names" value="">
<input type="hidden" id="ai_flaver_name" name="flaver_names" value="">
<input type="hidden" id="display_job_name" name="display_job_name" value="{{.display_job_name}}">
<h4 class="unite title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
@@ -108,12 +107,12 @@
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
</div>
</div>
<div style="margin-top:-5px;">
{{template "custom/wait_count_train" Dict "ctx" $}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: -0.5rem;">
{{template "custom/task_wait_count" .}}
</div>
<div style="display: flex;align-items: center;margin-left: 155px;margin-top: 0.5rem;margin-bottom: 1.5rem;">
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;margin-bottom: 1.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.train_dataset_path_rule" | Safe}}</span>
</div>
@@ -249,7 +248,7 @@
</div>
</div>

<div class="required unite min_title inline field" id="flaver_name">
<!--<div class="required unite min_title inline field" id="flaver_name">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width81" id="trainjob-flavor" style='width:385px' name="flavor">
{{if .flavor_name}}
@@ -261,6 +260,10 @@
{{end}}
{{end}}
</select>
</div>-->
<div class="required unite min_title inline field" id="flaver_name">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select id="__specs__" class="ui dropdown width81" style='width:385px' name="spec_id" ovalue="{{.spec_id}}"></select>
</div>
<div class="inline required unite min_title field">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>
@@ -287,6 +290,7 @@
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
let url_href = location.pathname.split('/create_version')[0]
@@ -427,53 +431,7 @@
$('select.dropdown')
.dropdown();

$('.ui.form')
.form({
on: 'blur',
inline:true,
fields: {
boot_file: {
identifier : 'boot_file',
rules: [
{
type: 'regExp[/.+\.py$/g]',
prompt : '启动文件必须为.py结尾'
}
]
},
job_name:{
identifier : 'job_name',
rules: [
{
type: 'regExp[/^[a-zA-Z0-9-_]{1,36}$/]',
prompt : '只包含大小写字母、数字、_和-,最长36个字符。'
}
]
},
display_job_name:{
identifier : 'display_job_name',
rules: [
{
type: 'regExp[/^[a-zA-Z0-9-_]{1,36}$/]',
prompt : '只包含大小写字母、数字、_和-,最长36个字符。'
}
]
},
attachment:{
identifier : 'attachment',
rules: [
{
type: 'empty',
prompt : '选择一个数据集'
}
]

},
},
})



var isValidate = false;
function validate(){
$('.ui.form')
.form({
@@ -517,12 +475,18 @@
]

},
spec_id: {
identifier: 'spec_id',
rules: [{ type: 'empty' }]
}
},
onSuccess: function(){
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
isValidate = true;
},
onFailure: function(e){
isValidate = false;
return false;
}
})
@@ -553,9 +517,22 @@

}

validate()
$('.ui.create_train_job.green.button').click(function(e) {
get_name()
send_run_para()
validate()
})

;(function() {
var SPECS = {{ .Specs }};
var showPoint = true;
renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
})();
</script>

+ 12
- 5
templates/user/dashboard/cloudbrains.tmpl View File

@@ -1,5 +1,6 @@
{{template "base/head" .}}
<!-- 提示框 -->
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<div class="alert"></div>
<div class="explore users">
<div class="cloudbrain_debug" style="display: none;" data-debug="{{$.i18n.Tr "repo.debug"}}"
@@ -159,11 +160,17 @@
</div>
<!-- XPU类型 -->
<div class="one wide column text center nowrap" style="width:10% !important;">
<span style="font-size: 12px;" title="{{.CardType}}">
{{if .CardType}}{{.CardType}}{{else}}--{{end}}
</span>
</div>

<span style="font-size: 12px;" title="" class="card_type_{{.DisplayJobName}}_{{$JobID}}"></span>
</div>
<script>
(function(){
var spec = {{.Spec}} || {};
var cardType = getListValueWithKey(ACC_CARD_TYPE, spec.AccCardType) || '--';
var spanEl = document.querySelector('.card_type_{{.DisplayJobName}}_{{$JobID}}');
spanEl.setAttribute('title', cardType);
spanEl.innerText = cardType;
})();
</script>
<!-- 项目 -->
<div class="two wide column text center nowrap" style="width: 11%!important;">
<a href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}"


+ 31
- 0
web_src/js/standalone/specsuse.js View File

@@ -0,0 +1,31 @@
window.ACC_CARD_TYPE = [{ k: 'T4', v: 'T4' }, { k: 'A100', v: 'A100' }, { k: 'V100', v: 'V100' }, { k: 'ASCEND910', v: 'Ascend 910' }, { k: 'MLU270', v: 'MLU270' }, { k: 'RTX3080', v: 'RTX3080' }];

window.getListValueWithKey = (list, key, k = 'k', v = 'v', defaultV = '') => {
for (let i = 0, iLen = list.length; i < iLen; i++) {
const listI = list[i];
if (listI[k] === key) return listI[v];
}
return defaultV;
};

window.renderSpecStr = (spec, showPoint, langObj) => {
showPoint = false;
var ngpu = `${spec.ComputeResource}: ${spec.AccCardsNum + '*' + getListValueWithKey(ACC_CARD_TYPE, spec.AccCardType)}`;
var gpuMemStr = spec.GPUMemGiB != 0 ? `${langObj.gpu_memory}: ${spec.GPUMemGiB}GB, ` : '';
var sharedMemStr = spec.ShareMemGiB != 0 ? `, ${langObj.shared_memory}: ${spec.ShareMemGiB}GB` : '';
var pointStr = showPoint ? `, ${spec.UnitPrice == 0 ? langObj.free : spec.UnitPrice + langObj.point_hr}` : '';
var specStr = `${ngpu}, CPU: ${spec.CpuCores}, ${gpuMemStr}${langObj.memory}: ${spec.MemGiB}GB${sharedMemStr}${pointStr}`;
return specStr;
};

window.renderSpecsSelect = (specsSel, data, showPoint, langObj) => {
specsSel.empty();
data = data || [];
var oValue = specsSel.attr('ovalue');
for (var i = 0, iLen = data.length; i < iLen; i++) {
var spec = data[i];
var specStr = window.renderSpecStr(spec, showPoint, langObj);
specsSel.append(`<option name="spec_id" value="${spec.ID}" queueCode="${spec.QueueCode}">${specStr}</option>`);
}
oValue && specsSel.val(oValue);
}

+ 1
- 1
web_src/vuepages/const/index.js View File

@@ -10,7 +10,7 @@ export const JOB_TYPE = [{ k: 'DEBUG', v: i18n.t('debugTask') }, { k: 'TRAIN', v

// 资源管理
export const CLUSTERS = [{ k: 'OpenI', v: i18n.t('resourcesManagement.OpenI') }, { k: 'C2Net', v: i18n.t('resourcesManagement.C2Net') }];
export const AI_CENTER = [{ k: 'OpenIOne', v: i18n.t('resourcesManagement.OpenIOne') }, { k: 'OpenITwo', v: i18n.t('resourcesManagement.OpenITwo') }, { k: 'chendu', v: i18n.t('resourcesManagement.chenduCenter') }, { k: 'pclcci', v: i18n.t('resourcesManagement.pclcci') }, { k: 'hefei', v: i18n.t('resourcesManagement.hefeiCenter') }, { k: 'xuchang', v: i18n.t('resourcesManagement.xuchangCenter') }];
export const AI_CENTER = [{ k: 'OpenIOne', v: i18n.t('resourcesManagement.OpenIOne') }, { k: 'OpenITwo', v: i18n.t('resourcesManagement.OpenITwo') }, { k: 'OpenIChengdu', v: i18n.t('resourcesManagement.OpenIChengdu') }, { k: 'pclcci', v: i18n.t('resourcesManagement.pclcci') }, { k: 'hefei', v: i18n.t('resourcesManagement.hefeiCenter') }, { k: 'xuchang', v: i18n.t('resourcesManagement.xuchangCenter') }];
export const COMPUTER_RESOURCES = [{ k: 'GPU', v: 'GPU' }, { k: 'NPU', v: 'NPU' }, { k: 'MLU', v: 'MLU' }];
export const ACC_CARD_TYPE = [{ k: 'T4', v: 'T4' }, { k: 'A100', v: 'A100' }, { k: 'V100', v: 'V100' }, { k: 'ASCEND910', v: 'Ascend 910' }, { k: 'MLU270', v: 'MLU270' }, { k: 'RTX3080', v: 'RTX3080' }];
export const SPECIFICATION_STATUS = [{ k: '1', v: i18n.t('resourcesManagement.willOnShelf') }, { k: '2', v: i18n.t('resourcesManagement.onShelf') }, { k: '3', v: i18n.t('resourcesManagement.offShelf') }];

+ 8
- 2
web_src/vuepages/langs/config/en-US.js View File

@@ -82,7 +82,8 @@ const en = {
C2Net: 'C2Net',
OpenIOne: 'OpenI One',
OpenITwo: 'OpenI Two',
chenduCenter: 'ChenDu AI Center',
OpenIChengdu: 'OpenI ChengDu AI Chenter',
chengduCenter: 'ChengDu AI Center',
pclcci: 'PCL Cloud Computer Institute',
hefeiCenter: 'HeFei AI Center',
xuchangCenter: 'XuChang AI Center',
@@ -131,6 +132,7 @@ const en = {
onShelfConfirm: 'Are you sure to on shelf the resources specification?',
offShelfConfirm: 'Are you sure to off shelf the resources specification?',
onShelfCode1001: 'On shelf failed, the resources queues not available.',
onShelfCode1003: 'On shelf failed, the resources specification not available.',
offShelfDlgTip1: 'The resources specification has already used in scene:',
offShelfDlgTip2: 'Please confirm to off shelf?',
resSceneManagement: 'Resources Scene Management',
@@ -149,7 +151,11 @@ const en = {
computeCluster: 'Compute Cluster',
resourceSpecification: 'Resource Specification',
lastUpdateTime: 'Last Update Time',
resSceneDeleteConfirm: 'Are you sure to delete the current Resource Scene?',
resSceneDeleteConfirm: 'Are you sure to delete the current Resource Scene?',
resourceSpecificationIsAvailable: 'Specification Is Available',
resourceSpecificationIsAvailableAll: 'Specification Is Available(All)',
available: 'Available',
notAvailable: 'Not Available',
},
}



+ 7
- 1
web_src/vuepages/langs/config/zh-CN.js View File

@@ -82,7 +82,8 @@ const zh = {
C2Net: '智算集群',
OpenIOne: '云脑一',
OpenITwo: '云脑二',
chenduCenter: '成都人工智能计算中心',
OpenIChengdu: '启智成都智算',
chengduCenter: '成都智算',
pclcci: '鹏城云计算所',
hefeiCenter: '合肥类脑类脑智能开放平台',
xuchangCenter: '中原人工智能计算中心',
@@ -131,6 +132,7 @@ const zh = {
onShelfConfirm: '请确认上架该规格?',
offShelfConfirm: '请确认下架该规格?',
onShelfCode1001: '上架失败,资源池(队列)不可用。',
onShelfCode1003: '上架失败,资源规格不可用。',
offShelfDlgTip1: '当前资源规格已在以下场景中使用:',
offShelfDlgTip2: '请确认进行下架操作?',
resSceneManagement: '算力资源应用场景管理',
@@ -150,6 +152,10 @@ const zh = {
resourceSpecification: '资源规格',
lastUpdateTime: '最后更新时间',
resSceneDeleteConfirm: '是否确认删除当前应用场景?',
resourceSpecificationIsAvailable: '资源规格是否可用',
resourceSpecificationIsAvailableAll: '资源规格是否可用(全部)',
available: '可用',
notAvailable: '不可用',
},
}



+ 3
- 2
web_src/vuepages/pages/resources/components/QueueDialog.vue View File

@@ -10,7 +10,8 @@
<span>{{ $t('resourcesManagement.resQueueName') }}</span>
</div>
<div class="content">
<el-input v-model="dataInfo.QueueCode" placeholder="" :disabled="type === 'edit'" maxlength="255"></el-input>
<el-input v-model="dataInfo.QueueCode" placeholder="" :disabled="type === 'edit'" maxlength="255">
</el-input>
</div>
</div>
<div class="form-row">
@@ -101,7 +102,7 @@ export default {
return {
dialogShow: false,
clusterList: [CLUSTERS[0]],
computingCenterList: [AI_CENTER[0], AI_CENTER[1]],
computingCenterList: [AI_CENTER[0], AI_CENTER[1], AI_CENTER[2]],
computingTypeList: [...COMPUTER_RESOURCES],
cardTypeList: [...ACC_CARD_TYPE],



+ 1
- 1
web_src/vuepages/pages/resources/components/SceneDialog.vue View File

@@ -88,7 +88,7 @@
<script>
import BaseDialog from '~/components/BaseDialog.vue';
import { getResQueueCode, getResSpecificationList, addResScene, updateResScene } from '~/apis/modules/resources';
import { JOB_TYPE, CLUSTERS, AI_CENTER, ACC_CARD_TYPE, SPECIFICATION_STATUS } from '~/const';
import { JOB_TYPE, CLUSTERS, ACC_CARD_TYPE, SPECIFICATION_STATUS } from '~/const';
import { getListValueWithKey } from '~/utils';

export default {


+ 7
- 2
web_src/vuepages/pages/resources/components/SpecificationDialog.vue View File

@@ -20,8 +20,8 @@
<span>{{ $t('resourcesManagement.sourceSpecCode') }}</span>
</div>
<div class="content">
<el-input v-model="dataInfo.SourceSpecId" :placeholder="$t('resourcesManagement.sourceSpecCodeTips')" maxlength="255"
:disabled="type === 'edit'">
<el-input v-model="dataInfo.SourceSpecId" :placeholder="$t('resourcesManagement.sourceSpecCodeTips')"
maxlength="255" :disabled="type === 'edit'">
</el-input>
</div>
</div>
@@ -245,6 +245,11 @@ export default {
type: 'info',
message: this.$t('resourcesManagement.onShelfCode1001')
});
} else if (action === 'on-shelf' && res.Code === 1003) {
this.$message({
type: 'info',
message: this.$t('resourcesManagement.onShelfCode1003')
});
} else {
this.$message({
type: 'error',


+ 30
- 3
web_src/vuepages/pages/resources/specification/index.vue View File

@@ -9,6 +9,9 @@
<el-select class="select" size="medium" v-model="selStatus" @change="selectChange">
<el-option v-for="item in statusList" :key="item.k" :label="item.v" :value="item.k" />
</el-select>
<el-select class="select" size="medium" v-model="selAvailable" @change="selectChange">
<el-option v-for="item in availableList" :key="item.k" :label="item.v" :value="item.k" />
</el-select>
</div>
<div>
<el-button size="medium" icon="el-icon-refresh" @click="syncComputerNetwork" v-loading="syncLoading">
@@ -19,7 +22,7 @@
</div>
<div class="table-container">
<div style="min-height:600px;">
<el-table border :data="tableData" style="width: 100%" v-loading="loading" stripe>
<el-table border :data="tableData" style="width: 100%;min-width:1700px;" v-loading="loading" stripe>
<el-table-column prop="ID" label="ID" align="center" header-align="center" width="60"></el-table-column>
<el-table-column prop="SpecStr" :label="$t('resourcesManagement.resourceSpecification')" align="left"
header-align="center" min-width="160">
@@ -49,6 +52,14 @@
<span style="font-weight:600;font-size:14px;">{{ scope.row.UnitPrice }}</span>
</template>
</el-table-column>
<el-table-column prop="IsAvailableStr" :label="$t('resourcesManagement.resourceSpecificationIsAvailable')" align="center"
header-align="center" width="100">
<template slot-scope="scope">
<span :style="{ color: scope.row.IsAvailable ? 'rgb(82, 196, 26)' : 'rgb(245, 34, 45)' }">{{
scope.row.IsAvailableStr
}}</span>
</template>
</el-table-column>
<el-table-column prop="StatusStr" :label="$t('resourcesManagement.status')" align="center"
header-align="center" width="100">
<template slot-scope="scope">
@@ -60,7 +71,10 @@
<el-table-column :label="$t('operation')" align="center" header-align="center" width="100">
<template slot-scope="scope">
<span v-if="scope.row.Status == '1' && !scope.row.UnitPrice">
<span class="op-btn" @click="showDialog('edit', scope.row)">{{
<span v-if="scope.row.IsAvailable" class="op-btn" @click="showDialog('edit', scope.row)">{{
$t('resourcesManagement.toSetPriceAndOnShelf')
}}</span>
<span v-else class="op-btn" style="color: rgb(187, 187, 187); cursor: not-allowed;">{{
$t('resourcesManagement.toSetPriceAndOnShelf')
}}</span>
</span>
@@ -71,9 +85,12 @@
}}</span>
</span>
<span v-if="scope.row.Status == '3' || scope.row.Status == '1' && scope.row.UnitPrice">
<span class="op-btn" @click="onShelf(scope.row)">{{
<span v-if="scope.row.IsAvailable" class="op-btn" @click="onShelf(scope.row)">{{
$t('resourcesManagement.toOnShelf')
}}</span>
<span v-else class="op-btn" style="color: rgb(187, 187, 187); cursor: not-allowed;">{{
$t('resourcesManagement.toSetPriceAndOnShelf')
}}</span>
</span>
</template>
</el-table-column>
@@ -132,6 +149,8 @@ export default {
queueList: [{ k: '', v: this.$t('resourcesManagement.allResQueue') }],
selStatus: '',
statusList: [{ k: '', v: this.$t('resourcesManagement.allStatus') }, ...SPECIFICATION_STATUS],
selAvailable: '',
availableList: [{ k: '', v: this.$t('resourcesManagement.resourceSpecificationIsAvailableAll') }, { k: '1', v: this.$t('resourcesManagement.available') }, { k: '2', v: this.$t('resourcesManagement.notAvailable') }],
clusterList: [...CLUSTERS],
accCardTypeList: [...ACC_CARD_TYPE],
syncLoading: false,
@@ -178,6 +197,7 @@ export default {
const params = {
queue: this.selQueue,
status: this.selStatus,
available: this.selAvailable,
page: this.pageInfo.curpage,
pagesize: this.pageInfo.pageSize,
};
@@ -201,6 +221,8 @@ export default {
UpdatedTimeStr: formatDate(new Date(Spec.UpdatedTime * 1000), 'yyyy-MM-dd HH:mm:ss'),
Status: Spec.Status.toString(),
StatusStr: getListValueWithKey(this.statusList, Spec.Status.toString()),
IsAvailable: Spec.IsAvailable,
IsAvailableStr: Spec.IsAvailable ? this.$t('resourcesManagement.available') : this.$t('resourcesManagement.notAvailable'),
}
});
this.tableData = data;
@@ -280,6 +302,11 @@ export default {
type: 'info',
message: this.$t('resourcesManagement.onShelfCode1001')
});
} else if (type === 'on-shelf' && res.Code === 1003) {
this.$message({
type: 'info',
message: this.$t('resourcesManagement.onShelfCode1003')
});
} else {
this.$message({
type: 'error',


Loading…
Cancel
Save