You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 19 kB

4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. package modelarts
  2. import (
  3. "code.gitea.io/gitea/modules/timeutil"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "path"
  8. "strconv"
  9. "code.gitea.io/gitea/models"
  10. "code.gitea.io/gitea/modules/context"
  11. "code.gitea.io/gitea/modules/log"
  12. "code.gitea.io/gitea/modules/notification"
  13. "code.gitea.io/gitea/modules/setting"
  14. "code.gitea.io/gitea/modules/storage"
  15. )
  16. const (
  17. //notebook
  18. storageTypeOBS = "obs"
  19. autoStopDuration = 4 * 60 * 60
  20. autoStopDurationMs = 4 * 60 * 60 * 1000
  21. DataSetMountPath = "/home/ma-user/work"
  22. NotebookEnv = "Python3"
  23. NotebookType = "Ascend"
  24. FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
  25. //train-job
  26. // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
  27. // Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}"
  28. // EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," +
  29. // "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," +
  30. // "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," +
  31. // "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" +
  32. // "]}"
  33. // TrainJobFlavorInfo = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," +
  34. // "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," +
  35. // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," +
  36. // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" +
  37. // "]}"
  38. CodePath = "/code/"
  39. OutputPath = "/output/"
  40. ResultPath = "/result/"
  41. LogPath = "/log/"
  42. JobPath = "/job/"
  43. OrderDesc = "desc" //向下查询
  44. OrderAsc = "asc" //向上查询
  45. Lines = 500
  46. TrainUrl = "train_url"
  47. DataUrl = "data_url"
  48. ResultUrl = "result_url"
  49. CkptUrl = "ckpt_url"
  50. DeviceTarget = "device_target"
  51. Ascend = "Ascend"
  52. PerPage = 10
  53. IsLatestVersion = "1"
  54. NotLatestVersion = "0"
  55. VersionCount = 1
  56. SortByCreateTime = "create_time"
  57. ConfigTypeCustom = "custom"
  58. TotalVersionCount = 1
  59. )
  60. var (
  61. poolInfos *models.PoolInfos
  62. FlavorInfos *models.FlavorInfos
  63. ImageInfos *models.ImageInfosModelArts
  64. )
  65. type GenerateTrainJobReq struct {
  66. JobName string
  67. DisplayJobName string
  68. Uuid string
  69. Description string
  70. CodeObsPath string
  71. BootFile string
  72. BootFileUrl string
  73. DataUrl string
  74. TrainUrl string
  75. FlavorCode string
  76. LogUrl string
  77. PoolID string
  78. WorkServerNumber int
  79. EngineID int64
  80. Parameters []models.Parameter
  81. CommitID string
  82. IsLatestVersion string
  83. Params string
  84. BranchName string
  85. PreVersionId int64
  86. PreVersionName string
  87. FlavorName string
  88. VersionCount int
  89. EngineName string
  90. TotalVersionCount int
  91. }
  92. type GenerateInferenceJobReq struct {
  93. JobName string
  94. DisplayJobName string
  95. Uuid string
  96. Description string
  97. CodeObsPath string
  98. BootFile string
  99. BootFileUrl string
  100. DataUrl string
  101. TrainUrl string
  102. FlavorCode string
  103. LogUrl string
  104. PoolID string
  105. WorkServerNumber int
  106. EngineID int64
  107. Parameters []models.Parameter
  108. CommitID string
  109. Params string
  110. BranchName string
  111. FlavorName string
  112. EngineName string
  113. LabelName string
  114. IsLatestVersion string
  115. VersionCount int
  116. TotalVersionCount int
  117. ModelName string
  118. ModelVersion string
  119. CkptName string
  120. ResultUrl string
  121. }
  122. type VersionInfo struct {
  123. Version []struct {
  124. ID int `json:"id"`
  125. Value string `json:"value"`
  126. } `json:"version"`
  127. }
  128. type Flavor struct {
  129. Info []struct {
  130. Code string `json:"code"`
  131. Value string `json:"value"`
  132. } `json:"flavor"`
  133. }
  134. type Engine struct {
  135. Info []struct {
  136. ID int `json:"id"`
  137. Value string `json:"value"`
  138. } `json:"engine"`
  139. }
  140. type ResourcePool struct {
  141. Info []struct {
  142. ID string `json:"id"`
  143. Value string `json:"value"`
  144. } `json:"resource_pool"`
  145. }
  146. // type Parameter struct {
  147. // Label string `json:"label"`
  148. // Value string `json:"value"`
  149. // }
  150. // type Parameters struct {
  151. // Parameter []Parameter `json:"parameter"`
  152. // }
  153. type Parameters struct {
  154. Parameter []struct {
  155. Label string `json:"label"`
  156. Value string `json:"value"`
  157. } `json:"parameter"`
  158. }
  159. func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error {
  160. var dataActualPath string
  161. if uuid != "" {
  162. dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  163. } else {
  164. userPath := setting.UserBasePath + ctx.User.Name + "/"
  165. isExist, err := storage.ObsHasObject(userPath)
  166. if err != nil {
  167. log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"])
  168. return err
  169. }
  170. if !isExist {
  171. if err = storage.ObsCreateObject(userPath); err != nil {
  172. log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"])
  173. return err
  174. }
  175. }
  176. dataActualPath = setting.Bucket + "/" + userPath
  177. }
  178. if poolInfos == nil {
  179. json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
  180. }
  181. createTime := timeutil.TimeStampNow()
  182. jobResult, err := CreateJob(models.CreateNotebookParams{
  183. JobName: jobName,
  184. Description: description,
  185. ProfileID: setting.ProfileID,
  186. Flavor: flavor,
  187. Pool: models.Pool{
  188. ID: poolInfos.PoolInfo[0].PoolId,
  189. Name: poolInfos.PoolInfo[0].PoolName,
  190. Type: poolInfos.PoolInfo[0].PoolType,
  191. },
  192. Spec: models.Spec{
  193. Storage: models.Storage{
  194. Type: storageTypeOBS,
  195. Location: models.Location{
  196. Path: dataActualPath,
  197. },
  198. },
  199. AutoStop: models.AutoStop{
  200. Enable: true,
  201. Duration: autoStopDuration,
  202. },
  203. },
  204. })
  205. if err != nil {
  206. log.Error("CreateJob failed: %v", err.Error())
  207. return err
  208. }
  209. err = models.CreateCloudbrain(&models.Cloudbrain{
  210. Status: string(models.JobWaiting),
  211. UserID: ctx.User.ID,
  212. RepoID: ctx.Repo.Repository.ID,
  213. JobID: jobResult.ID,
  214. JobName: jobName,
  215. JobType: string(models.JobTypeDebug),
  216. Type: models.TypeCloudBrainTwo,
  217. Uuid: uuid,
  218. ComputeResource: models.NPUResource,
  219. CreatedUnix: createTime,
  220. UpdatedUnix: createTime,
  221. })
  222. if err != nil {
  223. return err
  224. }
  225. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask)
  226. return nil
  227. }
  228. func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error {
  229. if poolInfos == nil {
  230. json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
  231. }
  232. imageName, err := GetNotebookImageName(imageId)
  233. if err != nil {
  234. log.Error("GetNotebookImageName failed: %v", err.Error())
  235. return err
  236. }
  237. createTime := timeutil.TimeStampNow()
  238. jobResult, err := createNotebook2(models.CreateNotebook2Params{
  239. JobName: jobName,
  240. Description: description,
  241. Flavor: flavor,
  242. Duration: autoStopDurationMs,
  243. ImageID: imageId,
  244. PoolID: poolInfos.PoolInfo[0].PoolId,
  245. Feature: models.NotebookFeature,
  246. Volume: models.VolumeReq{
  247. Capacity: setting.Capacity,
  248. Category: models.EVSCategory,
  249. Ownership: models.ManagedOwnership,
  250. },
  251. WorkspaceID: "0",
  252. })
  253. if err != nil {
  254. log.Error("createNotebook2 failed: %v", err.Error())
  255. return err
  256. }
  257. err = models.CreateCloudbrain(&models.Cloudbrain{
  258. Status: jobResult.Status,
  259. UserID: ctx.User.ID,
  260. RepoID: ctx.Repo.Repository.ID,
  261. JobID: jobResult.ID,
  262. JobName: jobName,
  263. FlavorCode: flavor,
  264. DisplayJobName: displayJobName,
  265. JobType: string(models.JobTypeDebug),
  266. Type: models.TypeCloudBrainTwo,
  267. Uuid: uuid,
  268. ComputeResource: models.NPUResource,
  269. Image: imageName,
  270. Description: description,
  271. CreatedUnix: createTime,
  272. UpdatedUnix: createTime,
  273. })
  274. if err != nil {
  275. return err
  276. }
  277. task, err := models.GetCloudbrainByName(jobName)
  278. if err != nil {
  279. log.Error("GetCloudbrainByName failed: %v", err.Error())
  280. return err
  281. }
  282. stringId := strconv.FormatInt(task.ID, 10)
  283. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
  284. return nil
  285. }
  286. func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
  287. createTime := timeutil.TimeStampNow()
  288. jobResult, err := createTrainJob(models.CreateTrainJobParams{
  289. JobName: req.JobName,
  290. Description: req.Description,
  291. Config: models.Config{
  292. WorkServerNum: req.WorkServerNumber,
  293. AppUrl: req.CodeObsPath,
  294. BootFileUrl: req.BootFileUrl,
  295. DataUrl: req.DataUrl,
  296. EngineID: req.EngineID,
  297. TrainUrl: req.TrainUrl,
  298. LogUrl: req.LogUrl,
  299. PoolID: req.PoolID,
  300. CreateVersion: true,
  301. Flavor: models.Flavor{
  302. Code: req.FlavorCode,
  303. },
  304. Parameter: req.Parameters,
  305. },
  306. })
  307. if err != nil {
  308. log.Error("CreateJob failed: %v", err.Error())
  309. return err
  310. }
  311. attach, err := models.GetAttachmentByUUID(req.Uuid)
  312. if err != nil {
  313. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  314. return err
  315. }
  316. jobId := strconv.FormatInt(jobResult.JobID, 10)
  317. err = models.CreateCloudbrain(&models.Cloudbrain{
  318. Status: TransTrainJobStatus(jobResult.Status),
  319. UserID: ctx.User.ID,
  320. RepoID: ctx.Repo.Repository.ID,
  321. JobID: jobId,
  322. JobName: req.JobName,
  323. DisplayJobName: req.DisplayJobName,
  324. JobType: string(models.JobTypeTrain),
  325. Type: models.TypeCloudBrainTwo,
  326. VersionID: jobResult.VersionID,
  327. VersionName: jobResult.VersionName,
  328. Uuid: req.Uuid,
  329. DatasetName: attach.Name,
  330. CommitID: req.CommitID,
  331. IsLatestVersion: req.IsLatestVersion,
  332. ComputeResource: models.NPUResource,
  333. EngineID: req.EngineID,
  334. TrainUrl: req.TrainUrl,
  335. BranchName: req.BranchName,
  336. Parameters: req.Params,
  337. BootFile: req.BootFile,
  338. DataUrl: req.DataUrl,
  339. LogUrl: req.LogUrl,
  340. FlavorCode: req.FlavorCode,
  341. Description: req.Description,
  342. WorkServerNumber: req.WorkServerNumber,
  343. FlavorName: req.FlavorName,
  344. EngineName: req.EngineName,
  345. VersionCount: req.VersionCount,
  346. TotalVersionCount: req.TotalVersionCount,
  347. CreatedUnix: createTime,
  348. UpdatedUnix: createTime,
  349. })
  350. if err != nil {
  351. log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
  352. return err
  353. }
  354. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask)
  355. return nil
  356. }
  357. func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
  358. createTime := timeutil.TimeStampNow()
  359. jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{
  360. Description: req.Description,
  361. Config: models.TrainJobVersionConfig{
  362. WorkServerNum: req.WorkServerNumber,
  363. AppUrl: req.CodeObsPath,
  364. BootFileUrl: req.BootFileUrl,
  365. DataUrl: req.DataUrl,
  366. EngineID: req.EngineID,
  367. TrainUrl: req.TrainUrl,
  368. LogUrl: req.LogUrl,
  369. PoolID: req.PoolID,
  370. Flavor: models.Flavor{
  371. Code: req.FlavorCode,
  372. },
  373. Parameter: req.Parameters,
  374. PreVersionId: req.PreVersionId,
  375. },
  376. }, jobId)
  377. if err != nil {
  378. log.Error("CreateJob failed: %v", err.Error())
  379. return err
  380. }
  381. attach, err := models.GetAttachmentByUUID(req.Uuid)
  382. if err != nil {
  383. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  384. return err
  385. }
  386. var jobTypes []string
  387. jobTypes = append(jobTypes, string(models.JobTypeTrain))
  388. repo := ctx.Repo.Repository
  389. VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
  390. RepoID: repo.ID,
  391. Type: models.TypeCloudBrainTwo,
  392. JobTypes: jobTypes,
  393. JobID: strconv.FormatInt(jobResult.JobID, 10),
  394. })
  395. if err != nil {
  396. ctx.ServerError("Cloudbrain", err)
  397. return err
  398. }
  399. //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount
  400. err = models.CreateCloudbrain(&models.Cloudbrain{
  401. Status: TransTrainJobStatus(jobResult.Status),
  402. UserID: ctx.User.ID,
  403. RepoID: ctx.Repo.Repository.ID,
  404. JobID: strconv.FormatInt(jobResult.JobID, 10),
  405. JobName: req.JobName,
  406. DisplayJobName: req.DisplayJobName,
  407. JobType: string(models.JobTypeTrain),
  408. Type: models.TypeCloudBrainTwo,
  409. VersionID: jobResult.VersionID,
  410. VersionName: jobResult.VersionName,
  411. Uuid: req.Uuid,
  412. DatasetName: attach.Name,
  413. CommitID: req.CommitID,
  414. IsLatestVersion: req.IsLatestVersion,
  415. PreVersionName: req.PreVersionName,
  416. ComputeResource: models.NPUResource,
  417. EngineID: req.EngineID,
  418. TrainUrl: req.TrainUrl,
  419. BranchName: req.BranchName,
  420. Parameters: req.Params,
  421. BootFile: req.BootFile,
  422. DataUrl: req.DataUrl,
  423. LogUrl: req.LogUrl,
  424. PreVersionId: req.PreVersionId,
  425. FlavorCode: req.FlavorCode,
  426. Description: req.Description,
  427. WorkServerNumber: req.WorkServerNumber,
  428. FlavorName: req.FlavorName,
  429. EngineName: req.EngineName,
  430. TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1,
  431. VersionCount: VersionListCount + 1,
  432. CreatedUnix: createTime,
  433. UpdatedUnix: createTime,
  434. })
  435. if err != nil {
  436. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  437. return err
  438. }
  439. //将训练任务的上一版本的isLatestVersion设置为"0"
  440. err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
  441. if err != nil {
  442. ctx.ServerError("Update IsLatestVersion failed", err)
  443. return err
  444. }
  445. return err
  446. }
  447. func TransTrainJobStatus(status int) string {
  448. switch status {
  449. case 0:
  450. return "UNKNOWN"
  451. case 1:
  452. return "INIT"
  453. case 2:
  454. return "IMAGE_CREATING"
  455. case 3:
  456. return "IMAGE_FAILED"
  457. case 4:
  458. return "SUBMIT_TRYING"
  459. case 5:
  460. return "SUBMIT_FAILED"
  461. case 6:
  462. return "DELETE_FAILED"
  463. case 7:
  464. return "WAITING"
  465. case 8:
  466. return "RUNNING"
  467. case 9:
  468. return "KILLING"
  469. case 10:
  470. return "COMPLETED"
  471. case 11:
  472. return "FAILED"
  473. case 12:
  474. return "KILLED"
  475. case 13:
  476. return "CANCELED"
  477. case 14:
  478. return "LOST"
  479. case 15:
  480. return "SCALING"
  481. case 16:
  482. return "SUBMIT_MODEL_FAILED"
  483. case 17:
  484. return "DEPLOY_SERVICE_FAILED"
  485. case 18:
  486. return "CHECK_INIT"
  487. case 19:
  488. return "CHECK_RUNNING"
  489. case 20:
  490. return "CHECK_RUNNING_COMPLETED"
  491. case 21:
  492. return "CHECK_FAILED"
  493. default:
  494. return strconv.Itoa(status)
  495. }
  496. }
  497. func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) {
  498. talVersionCountToString := fmt.Sprintf("%04d", TotalVersionCount)
  499. VersionOutputPath = "V" + talVersionCountToString
  500. return VersionOutputPath
  501. }
  502. func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) {
  503. createTime := timeutil.TimeStampNow()
  504. jobResult, err := createInferenceJob(models.CreateInferenceJobParams{
  505. JobName: req.JobName,
  506. Description: req.Description,
  507. InfConfig: models.InfConfig{
  508. WorkServerNum: req.WorkServerNumber,
  509. AppUrl: req.CodeObsPath,
  510. BootFileUrl: req.BootFileUrl,
  511. DataUrl: req.DataUrl,
  512. EngineID: req.EngineID,
  513. // TrainUrl: req.TrainUrl,
  514. LogUrl: req.LogUrl,
  515. PoolID: req.PoolID,
  516. CreateVersion: true,
  517. Flavor: models.Flavor{
  518. Code: req.FlavorCode,
  519. },
  520. Parameter: req.Parameters,
  521. },
  522. })
  523. if err != nil {
  524. log.Error("CreateJob failed: %v", err.Error())
  525. return err
  526. }
  527. attach, err := models.GetAttachmentByUUID(req.Uuid)
  528. if err != nil {
  529. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  530. return err
  531. }
  532. jobID := strconv.FormatInt(jobResult.JobID, 10)
  533. err = models.CreateCloudbrain(&models.Cloudbrain{
  534. Status: TransTrainJobStatus(jobResult.Status),
  535. UserID: ctx.User.ID,
  536. RepoID: ctx.Repo.Repository.ID,
  537. JobID: jobID,
  538. JobName: req.JobName,
  539. DisplayJobName: req.DisplayJobName,
  540. JobType: string(models.JobTypeInference),
  541. Type: models.TypeCloudBrainTwo,
  542. VersionID: jobResult.VersionID,
  543. VersionName: jobResult.VersionName,
  544. Uuid: req.Uuid,
  545. DatasetName: attach.Name,
  546. CommitID: req.CommitID,
  547. EngineID: req.EngineID,
  548. TrainUrl: req.TrainUrl,
  549. BranchName: req.BranchName,
  550. Parameters: req.Params,
  551. BootFile: req.BootFile,
  552. DataUrl: req.DataUrl,
  553. LogUrl: req.LogUrl,
  554. FlavorCode: req.FlavorCode,
  555. Description: req.Description,
  556. WorkServerNumber: req.WorkServerNumber,
  557. FlavorName: req.FlavorName,
  558. EngineName: req.EngineName,
  559. LabelName: req.LabelName,
  560. IsLatestVersion: req.IsLatestVersion,
  561. ComputeResource: models.NPUResource,
  562. VersionCount: req.VersionCount,
  563. TotalVersionCount: req.TotalVersionCount,
  564. ModelName: req.ModelName,
  565. ModelVersion: req.ModelVersion,
  566. CkptName: req.CkptName,
  567. ResultUrl: req.ResultUrl,
  568. CreatedUnix: createTime,
  569. UpdatedUnix: createTime,
  570. })
  571. if err != nil {
  572. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  573. return err
  574. }
  575. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateInferenceTask)
  576. return nil
  577. }
  578. func GetNotebookImageName(imageId string) (string, error) {
  579. var validImage = false
  580. var imageName = ""
  581. if ImageInfos == nil {
  582. json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
  583. }
  584. for _, imageInfo := range ImageInfos.ImageInfo {
  585. if imageInfo.Id == imageId {
  586. validImage = true
  587. imageName = imageInfo.Value
  588. }
  589. }
  590. if !validImage {
  591. log.Error("the image id(%s) is invalid", imageId)
  592. return imageName, errors.New("the image id is invalid")
  593. }
  594. return imageName, nil
  595. }