You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resource_specification.go 15 kB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago

  1. package models
  2. import (
  3. "code.gitea.io/gitea/modules/timeutil"
  4. "fmt"
  5. "xorm.io/builder"
  6. )
  7. const (
  8. SpecNotVerified int = iota + 1
  9. SpecOnShelf
  10. SpecOffShelf
  11. )
  12. type ResourceSpecification struct {
  13. ID int64 `xorm:"pk autoincr"`
  14. QueueId int64 `xorm:"INDEX"`
  15. SourceSpecId string `xorm:"INDEX"`
  16. AccCardsNum int
  17. CpuCores int
  18. MemGiB float32
  19. GPUMemGiB float32
  20. ShareMemGiB float32
  21. UnitPrice int
  22. Status int
  23. IsAvailable bool
  24. IsAutomaticSync bool
  25. CreatedTime timeutil.TimeStamp `xorm:"created"`
  26. CreatedBy int64
  27. UpdatedTime timeutil.TimeStamp `xorm:"updated"`
  28. UpdatedBy int64
  29. }
  30. func (r ResourceSpecification) ConvertToRes() *ResourceSpecificationRes {
  31. return &ResourceSpecificationRes{
  32. ID: r.ID,
  33. SourceSpecId: r.SourceSpecId,
  34. AccCardsNum: r.AccCardsNum,
  35. CpuCores: r.CpuCores,
  36. MemGiB: r.MemGiB,
  37. ShareMemGiB: r.ShareMemGiB,
  38. GPUMemGiB: r.GPUMemGiB,
  39. UnitPrice: r.UnitPrice,
  40. Status: r.Status,
  41. IsAvailable: r.IsAvailable,
  42. UpdatedTime: r.UpdatedTime,
  43. }
  44. }
  45. type ResourceSpecificationReq struct {
  46. QueueId int64 `binding:"Required"`
  47. SourceSpecId string
  48. AccCardsNum int
  49. CpuCores int
  50. MemGiB float32
  51. GPUMemGiB float32
  52. ShareMemGiB float32
  53. UnitPrice int
  54. Status int
  55. IsAutomaticSync bool
  56. CreatorId int64
  57. }
  58. func (r ResourceSpecificationReq) ToDTO() ResourceSpecification {
  59. return ResourceSpecification{
  60. QueueId: r.QueueId,
  61. SourceSpecId: r.SourceSpecId,
  62. AccCardsNum: r.AccCardsNum,
  63. CpuCores: r.CpuCores,
  64. MemGiB: r.MemGiB,
  65. GPUMemGiB: r.GPUMemGiB,
  66. ShareMemGiB: r.ShareMemGiB,
  67. UnitPrice: r.UnitPrice,
  68. Status: r.Status,
  69. IsAutomaticSync: r.IsAutomaticSync,
  70. CreatedBy: r.CreatorId,
  71. UpdatedBy: r.CreatorId,
  72. IsAvailable: true,
  73. }
  74. }
  75. type SearchResourceSpecificationOptions struct {
  76. ListOptions
  77. QueueId int64
  78. Status int
  79. Cluster string
  80. AvailableCode int
  81. }
  82. type SearchResourceBriefSpecificationOptions struct {
  83. QueueId int64
  84. Cluster string
  85. }
  86. type ResourceSpecAndQueueListRes struct {
  87. TotalSize int64
  88. List []*ResourceSpecAndQueueRes
  89. }
  90. func NewResourceSpecAndQueueListRes(totalSize int64, list []ResourceSpecAndQueue) *ResourceSpecAndQueueListRes {
  91. resList := make([]*ResourceSpecAndQueueRes, len(list))
  92. for i, v := range list {
  93. resList[i] = v.ConvertToRes()
  94. }
  95. return &ResourceSpecAndQueueListRes{
  96. TotalSize: totalSize,
  97. List: resList,
  98. }
  99. }
  100. type ResourceSpecificationRes struct {
  101. ID int64
  102. SourceSpecId string
  103. AccCardsNum int
  104. CpuCores int
  105. MemGiB float32
  106. GPUMemGiB float32
  107. ShareMemGiB float32
  108. UnitPrice int
  109. Status int
  110. IsAvailable bool
  111. UpdatedTime timeutil.TimeStamp
  112. }
  113. func (ResourceSpecificationRes) TableName() string {
  114. return "resource_specification"
  115. }
  116. type ResourceSpecAndQueueRes struct {
  117. Spec *ResourceSpecificationRes
  118. Queue *ResourceQueueRes
  119. }
  120. type ResourceSpecAndQueue struct {
  121. ResourceSpecification `xorm:"extends"`
  122. ResourceQueue `xorm:"extends"`
  123. }
  124. func (*ResourceSpecAndQueue) TableName() string {
  125. return "resource_specification"
  126. }
  127. func (r ResourceSpecAndQueue) ConvertToRes() *ResourceSpecAndQueueRes {
  128. return &ResourceSpecAndQueueRes{
  129. Spec: r.ResourceSpecification.ConvertToRes(),
  130. Queue: r.ResourceQueue.ConvertToRes(),
  131. }
  132. }
  133. type FindSpecsOptions struct {
  134. JobType JobType
  135. ComputeResource string
  136. Cluster string
  137. AiCenterCode string
  138. SpecId int64
  139. QueueCode string
  140. SourceSpecId string
  141. AccCardsNum int
  142. UseAccCardsNum bool
  143. AccCardType string
  144. CpuCores int
  145. UseCpuCores bool
  146. MemGiB float32
  147. UseMemGiB bool
  148. GPUMemGiB float32
  149. UseGPUMemGiB bool
  150. ShareMemGiB float32
  151. UseShareMemGiB bool
  152. //if true,find specs no matter used or not used in scene. if false,only find specs used in scene
  153. RequestAll bool
  154. }
  155. type Specification struct {
  156. ID int64
  157. SourceSpecId string
  158. AccCardsNum int
  159. AccCardType string
  160. CpuCores int
  161. MemGiB float32
  162. GPUMemGiB float32
  163. ShareMemGiB float32
  164. ComputeResource string
  165. UnitPrice int
  166. QueueId int64
  167. QueueCode string
  168. Cluster string
  169. AiCenterCode string
  170. AiCenterName string
  171. IsExclusive bool
  172. ExclusiveOrg string
  173. }
  174. func (Specification) TableName() string {
  175. return "resource_specification"
  176. }
  177. func InsertResourceSpecification(r ResourceSpecification) (int64, error) {
  178. return x.Insert(&r)
  179. }
  180. func UpdateResourceSpecificationById(queueId int64, spec ResourceSpecification) (int64, error) {
  181. return x.ID(queueId).Update(&spec)
  182. }
  183. func UpdateSpecUnitPriceById(id int64, unitPrice int) error {
  184. _, err := x.Exec("update resource_specification set unit_price = ? ,updated_time = ? where id = ?", unitPrice, timeutil.TimeStampNow(), id)
  185. return err
  186. }
  187. func SearchResourceSpecification(opts SearchResourceSpecificationOptions) (int64, []ResourceSpecAndQueue, error) {
  188. var cond = builder.NewCond()
  189. if opts.Page <= 0 {
  190. opts.Page = 1
  191. }
  192. if opts.QueueId > 0 {
  193. cond = cond.And(builder.Eq{"resource_specification.queue_id": opts.QueueId})
  194. }
  195. if opts.Status > 0 {
  196. cond = cond.And(builder.Eq{"resource_specification.status": opts.Status})
  197. }
  198. if opts.Cluster != "" {
  199. cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
  200. }
  201. if opts.AvailableCode == 1 {
  202. cond = cond.And(builder.Eq{"resource_specification.is_available": true})
  203. } else if opts.AvailableCode == 2 {
  204. cond = cond.And(builder.Eq{"resource_specification.is_available": false})
  205. }
  206. //cond = cond.And(builder.Or(builder.Eq{"resource_queue.deleted_time": 0}).Or(builder.IsNull{"resource_queue.deleted_time"}))
  207. n, err := x.Where(cond).Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
  208. Unscoped().Count(&ResourceSpecAndQueue{})
  209. if err != nil {
  210. return 0, nil, err
  211. }
  212. r := make([]ResourceSpecAndQueue, 0)
  213. err = x.Where(cond).
  214. Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id").
  215. Desc("resource_specification.id").
  216. Limit(opts.PageSize, (opts.Page-1)*opts.PageSize).
  217. Unscoped().Find(&r)
  218. if err != nil {
  219. return 0, nil, err
  220. }
  221. return n, r, nil
  222. }
  223. func GetSpecScenes(specId int64) ([]ResourceSceneBriefRes, error) {
  224. r := make([]ResourceSceneBriefRes, 0)
  225. err := x.Where("resource_scene_spec.spec_id = ?", specId).
  226. Join("INNER", "resource_scene_spec", "resource_scene_spec.scene_id = resource_scene.id").
  227. Find(&r)
  228. if err != nil {
  229. return nil, err
  230. }
  231. return r, nil
  232. }
  233. func ResourceSpecOnShelf(id int64, unitPrice int) error {
  234. _, err := x.Exec("update resource_specification set unit_price = ?,updated_time = ?,status = ? where id = ?", unitPrice, timeutil.TimeStampNow(), SpecOnShelf, id)
  235. return err
  236. }
  237. func ResourceSpecOffShelf(id int64) (int64, error) {
  238. sess := x.NewSession()
  239. var err error
  240. defer func() {
  241. if err != nil {
  242. sess.Rollback()
  243. }
  244. sess.Close()
  245. }()
  246. //delete scene spec relation
  247. if _, err = sess.Where("spec_id = ?", id).Delete(&ResourceSceneSpec{}); err != nil {
  248. return 0, err
  249. }
  250. param := ResourceSpecification{
  251. Status: SpecOffShelf,
  252. }
  253. n, err := sess.Where("id = ? and status = ?", id, SpecOnShelf).Update(&param)
  254. if err != nil {
  255. return 0, err
  256. }
  257. sess.Commit()
  258. return n, err
  259. }
  260. func GetResourceSpecification(r *ResourceSpecification) (*ResourceSpecification, error) {
  261. has, err := x.Get(r)
  262. if err != nil {
  263. return nil, err
  264. } else if !has {
  265. return nil, nil
  266. }
  267. return r, nil
  268. }
  269. func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceSpecification, existIds []int64) error {
  270. sess := x.NewSession()
  271. var err error
  272. defer func() {
  273. if err != nil {
  274. sess.Rollback()
  275. }
  276. sess.Close()
  277. }()
  278. //delete specs and scene that no longer exists
  279. deleteIds := make([]int64, 0)
  280. cond := builder.NewCond()
  281. cond = cond.And(builder.NotIn("resource_specification.id", existIds)).And(builder.Eq{"resource_queue.cluster": C2NetCluster})
  282. if err := sess.Cols("resource_specification.id").Table("resource_specification").
  283. Where(cond).Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id").
  284. Find(&deleteIds); err != nil {
  285. return err
  286. }
  287. if len(deleteIds) > 0 {
  288. if _, err = sess.Cols("status", "is_available").In("id", deleteIds).Update(&ResourceSpecification{Status: SpecOffShelf, IsAvailable: false}); err != nil {
  289. return err
  290. }
  291. if _, err = sess.In("spec_id", deleteIds).Delete(&ResourceSceneSpec{}); err != nil {
  292. return err
  293. }
  294. }
  295. //update exists specs
  296. if len(updateList) > 0 {
  297. for _, v := range updateList {
  298. if _, err = sess.ID(v.ID).UseBool("is_available").Update(&v); err != nil {
  299. return err
  300. }
  301. }
  302. }
  303. //insert new specs
  304. if len(insertList) > 0 {
  305. if _, err = sess.Insert(insertList); err != nil {
  306. return err
  307. }
  308. }
  309. return sess.Commit()
  310. }
  311. //FindSpecs
  312. func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) {
  313. var cond = builder.NewCond()
  314. if !opts.RequestAll && opts.JobType != "" {
  315. cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType})
  316. }
  317. if opts.ComputeResource != "" {
  318. cond = cond.And(builder.Eq{"resource_queue.compute_resource": opts.ComputeResource})
  319. }
  320. if opts.Cluster != "" {
  321. cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster})
  322. }
  323. if opts.AiCenterCode != "" {
  324. cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode})
  325. }
  326. if opts.SpecId > 0 {
  327. cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId})
  328. }
  329. if opts.QueueCode != "" {
  330. cond = cond.And(builder.Eq{"resource_queue.queue_code": opts.QueueCode})
  331. }
  332. if opts.SourceSpecId != "" {
  333. cond = cond.And(builder.Eq{"resource_specification.source_spec_id": opts.SourceSpecId})
  334. }
  335. if opts.UseAccCardsNum {
  336. cond = cond.And(builder.Eq{"resource_specification.acc_cards_num": opts.AccCardsNum})
  337. }
  338. if opts.AccCardType != "" {
  339. cond = cond.And(builder.Eq{"resource_queue.acc_card_type": opts.AccCardType})
  340. }
  341. if opts.UseCpuCores {
  342. cond = cond.And(builder.Eq{"resource_specification.cpu_cores": opts.CpuCores})
  343. }
  344. if opts.UseMemGiB {
  345. cond = cond.And(builder.Eq{"resource_specification.mem_gi_b": opts.MemGiB})
  346. }
  347. if opts.UseGPUMemGiB {
  348. cond = cond.And(builder.Eq{"resource_specification.gpu_mem_gi_b": opts.GPUMemGiB})
  349. }
  350. if opts.UseShareMemGiB {
  351. cond = cond.And(builder.Eq{"resource_specification.share_mem_gi_b": opts.ShareMemGiB})
  352. }
  353. r := make([]*Specification, 0)
  354. s := x.Where(cond).
  355. Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id")
  356. if !opts.RequestAll {
  357. s = s.Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id").
  358. Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id")
  359. }
  360. err := s.OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc,resource_specification.cpu_cores asc,resource_specification.mem_gi_b asc,resource_specification.share_mem_gi_b asc").
  361. Unscoped().Find(&r)
  362. if err != nil {
  363. return nil, err
  364. }
  365. return r, nil
  366. }
  367. func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specification, error) {
  368. sess := x.NewSession()
  369. defer sess.Close()
  370. sess.Begin()
  371. param := ResourceQueue{
  372. QueueCode: queue.QueueCode,
  373. Cluster: queue.Cluster,
  374. AiCenterCode: queue.AiCenterCode,
  375. ComputeResource: queue.ComputeResource,
  376. AccCardType: queue.AccCardType,
  377. }
  378. _, err := sess.Get(&param)
  379. if err != nil {
  380. sess.Rollback()
  381. return nil, err
  382. }
  383. if param.ID == 0 {
  384. _, err = sess.InsertOne(&queue)
  385. if err != nil {
  386. sess.Rollback()
  387. return nil, err
  388. }
  389. } else {
  390. queue = param
  391. }
  392. spec.QueueId = queue.ID
  393. _, err = sess.InsertOne(&spec)
  394. if err != nil {
  395. sess.Rollback()
  396. return nil, err
  397. }
  398. sess.Commit()
  399. return BuildSpecification(queue, spec), nil
  400. }
  401. func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification {
  402. return &Specification{
  403. ID: spec.ID,
  404. SourceSpecId: spec.SourceSpecId,
  405. AccCardsNum: spec.AccCardsNum,
  406. AccCardType: queue.AccCardType,
  407. CpuCores: spec.CpuCores,
  408. MemGiB: spec.MemGiB,
  409. GPUMemGiB: spec.GPUMemGiB,
  410. ShareMemGiB: spec.ShareMemGiB,
  411. ComputeResource: queue.ComputeResource,
  412. UnitPrice: spec.UnitPrice,
  413. QueueId: queue.ID,
  414. QueueCode: queue.QueueCode,
  415. Cluster: queue.Cluster,
  416. AiCenterCode: queue.AiCenterCode,
  417. AiCenterName: queue.AiCenterName,
  418. }
  419. }
  420. func GetCloudbrainOneAccCardType(queueCode string) string {
  421. switch queueCode {
  422. case "a100":
  423. return "A100"
  424. case "openidebug":
  425. return "T4"
  426. case "openidgx":
  427. return "V100"
  428. }
  429. return ""
  430. }
  431. var cloudbrainTwoSpecsInitFlag = false
  432. var cloudbrainTwoSpecs map[string]*Specification
  433. func GetCloudbrainTwoSpecs() (map[string]*Specification, error) {
  434. if !cloudbrainTwoSpecsInitFlag {
  435. r, err := InitCloudbrainTwoSpecs()
  436. if err != nil {
  437. return nil, err
  438. }
  439. cloudbrainTwoSpecsInitFlag = true
  440. cloudbrainTwoSpecs = r
  441. }
  442. return cloudbrainTwoSpecs, nil
  443. }
  444. func InitCloudbrainTwoSpecs() (map[string]*Specification, error) {
  445. r := make(map[string]*Specification, 0)
  446. queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"})
  447. if err != nil {
  448. return nil, err
  449. }
  450. if queue == nil {
  451. queue = &ResourceQueue{
  452. QueueCode: "openisupport",
  453. Cluster: OpenICluster,
  454. AiCenterCode: AICenterOfCloudBrainTwo,
  455. AiCenterName: "云脑二",
  456. ComputeResource: NPU,
  457. AccCardType: "ASCEND910",
  458. Remark: "处理历史云脑任务时自动生成",
  459. }
  460. _, err = x.InsertOne(queue)
  461. if err != nil {
  462. return nil, err
  463. }
  464. }
  465. for i := 1; i <= 8; i = i * 2 {
  466. sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i)
  467. spec, err := GetResourceSpecification(&ResourceSpecification{
  468. SourceSpecId: sourceSpecId,
  469. QueueId: queue.ID,
  470. })
  471. if err != nil {
  472. return nil, err
  473. }
  474. if spec == nil {
  475. spec = &ResourceSpecification{
  476. QueueId: queue.ID,
  477. SourceSpecId: sourceSpecId,
  478. AccCardsNum: i,
  479. CpuCores: i * 24,
  480. MemGiB: float32(i * 256),
  481. GPUMemGiB: float32(32),
  482. Status: SpecOffShelf,
  483. IsAvailable: true,
  484. }
  485. _, err = x.Insert(spec)
  486. if err != nil {
  487. return nil, err
  488. }
  489. }
  490. r[sourceSpecId] = BuildSpecification(*queue, *spec)
  491. }
  492. return r, nil
  493. }
  494. var grampusSpecsInitFlag = false
  495. var grampusSpecs map[string]*Specification
  496. func GetGrampusSpecs() (map[string]*Specification, error) {
  497. if !grampusSpecsInitFlag {
  498. specMap := make(map[string]*Specification, 0)
  499. r, err := FindSpecs(FindSpecsOptions{
  500. Cluster: C2NetCluster,
  501. RequestAll: true,
  502. })
  503. if err != nil {
  504. return nil, err
  505. }
  506. for _, spec := range r {
  507. specMap[spec.SourceSpecId] = spec
  508. specMap[spec.SourceSpecId+"_"+spec.AiCenterCode] = spec
  509. }
  510. grampusSpecsInitFlag = true
  511. grampusSpecs = specMap
  512. }
  513. return grampusSpecs, nil
  514. }