You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

notebook.go 12 kB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. package cloudbrainTask
  2. import (
  3. "fmt"
  4. "net/http"
  5. "path"
  6. "code.gitea.io/gitea/modules/modelarts"
  7. "code.gitea.io/gitea/modules/modelarts_cd"
  8. "code.gitea.io/gitea/modules/git"
  9. "code.gitea.io/gitea/modules/cloudbrain"
  10. "code.gitea.io/gitea/modules/log"
  11. "code.gitea.io/gitea/modules/redis/redis_key"
  12. "code.gitea.io/gitea/modules/redis/redis_lock"
  13. "code.gitea.io/gitea/modules/storage"
  14. "code.gitea.io/gitea/services/cloudbrain/resource"
  15. "code.gitea.io/gitea/services/reward/point/account"
  16. "code.gitea.io/gitea/modules/setting"
  17. cloudbrainService "code.gitea.io/gitea/services/cloudbrain"
  18. repo_service "code.gitea.io/gitea/services/repository"
  19. "code.gitea.io/gitea/models"
  20. "code.gitea.io/gitea/modules/context"
  21. api "code.gitea.io/gitea/modules/structs"
  22. "code.gitea.io/gitea/modules/util"
  23. )
  24. const NoteBookExtension = ".ipynb"
  25. func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption) {
  26. if ctx.Written() {
  27. return
  28. }
  29. if path.Ext(option.File) != NoteBookExtension {
  30. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_select_wrong")))
  31. return
  32. }
  33. isNotebookFileExist, _ := isNoteBookFileExist(ctx, option)
  34. if !isNotebookFileExist {
  35. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  36. return
  37. }
  38. sourceRepo, err := models.GetRepositoryByOwnerAndName(option.OwnerName, option.ProjectName)
  39. if err != nil {
  40. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  41. return
  42. }
  43. permission, err := models.GetUserRepoPermission(sourceRepo, ctx.User)
  44. if err != nil {
  45. log.Error("Get permission failed", err)
  46. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_no_right")))
  47. return
  48. }
  49. if !permission.CanRead(models.UnitTypeCode) {
  50. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_no_right")))
  51. return
  52. }
  53. //create repo if not exist
  54. repo, err := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName)
  55. if repo == nil {
  56. repo, err = repo_service.CreateRepository(ctx.User, ctx.User, models.CreateRepoOptions{
  57. Name: setting.FileNoteBook.ProjectName,
  58. Alias: "",
  59. Description: "",
  60. IssueLabels: "",
  61. Gitignores: "",
  62. License: "",
  63. Readme: "Default",
  64. IsPrivate: false,
  65. AutoInit: true,
  66. DefaultBranch: "master",
  67. })
  68. }
  69. if err != nil {
  70. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.failed_to_create_notebook_repo",setting.FileNoteBook.ProjectName)))
  71. return
  72. }
  73. if option.Type <= 1 {
  74. cloudBrainFileNoteBookCreate(ctx, option, repo, sourceRepo)
  75. } else {
  76. modelartsFileNoteBookCreate(ctx, option, repo, sourceRepo)
  77. }
  78. }
  79. func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) {
  80. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  81. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  82. jobType := string(models.JobTypeDebug)
  83. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName))
  84. defer lock.UnLock()
  85. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  86. if !isOk {
  87. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  88. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  89. return
  90. }
  91. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
  92. if err == nil {
  93. if len(tasks) != 0 {
  94. log.Error("the job name did already exist", ctx.Data["MsgID"])
  95. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  96. return
  97. }
  98. } else {
  99. if !models.IsErrJobNotExist(err) {
  100. log.Error("system error, %v", err, ctx.Data["MsgID"])
  101. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  102. return
  103. }
  104. }
  105. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType)
  106. if err != nil {
  107. log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
  108. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  109. return
  110. } else {
  111. if count >= 1 {
  112. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  113. ctx.JSON(http.StatusOK,models.BaseMessageApi{
  114. Code: 2,
  115. Message: ctx.Tr("repo.cloudbrain.morethanonejob"),
  116. })
  117. return
  118. }
  119. }
  120. errStr := uploadCodeFile(sourceRepo, getCodePath(jobName), option.BranchName, option.File, jobName)
  121. if errStr != "" {
  122. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  123. return
  124. }
  125. command := cloudbrain.GetCloudbrainDebugCommand()
  126. specId := setting.FileNoteBook.SpecIdGPU
  127. if option.Type == 0 {
  128. specId = setting.FileNoteBook.SpecIdCPU
  129. }
  130. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  131. JobType: models.JobType(jobType),
  132. ComputeResource: models.GPU,
  133. Cluster: models.OpenICluster,
  134. AiCenterCode: models.AICenterOfCloudBrainOne})
  135. if err != nil || spec == nil {
  136. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  137. return
  138. }
  139. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  140. log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
  141. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  142. return
  143. }
  144. ctx.Repo = &context.Repository{
  145. Repository: repo,
  146. }
  147. req := cloudbrain.GenerateCloudBrainTaskReq{
  148. Ctx: ctx,
  149. DisplayJobName: displayJobName,
  150. JobName: jobName,
  151. Image: setting.FileNoteBook.ImageGPU,
  152. Command: command,
  153. Uuids: "",
  154. DatasetNames: "",
  155. DatasetInfos: nil,
  156. CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
  157. ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
  158. BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
  159. Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
  160. BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
  161. JobType: jobType,
  162. Description: getDescription(option),
  163. BranchName: option.BranchName,
  164. BootFile: option.File,
  165. Params: "{\"parameter\":[]}",
  166. CommitID: "",
  167. BenchmarkTypeID: 0,
  168. BenchmarkChildTypeID: 0,
  169. ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
  170. Spec: spec,
  171. }
  172. jobId, err := cloudbrain.GenerateTask(req)
  173. if err != nil {
  174. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  175. return
  176. }
  177. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  178. Code: 0,
  179. Message: jobId,
  180. })
  181. }
  182. func getCodePath(jobName string) string {
  183. return setting.JobPath + jobName + cloudbrain.CodeMountPath
  184. }
  185. func getDescription(option api.CreateFileNotebookJobOption) string {
  186. return option.OwnerName + "/" + option.ProjectName + "/" + option.File
  187. }
  188. func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) {
  189. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  190. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  191. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName))
  192. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  193. if !isOk {
  194. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  195. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  196. return
  197. }
  198. defer lock.UnLock()
  199. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug))
  200. if err != nil {
  201. log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
  202. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  203. return
  204. } else {
  205. if count >= 1 {
  206. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  207. ctx.JSON(http.StatusOK,models.BaseMessageApi{
  208. Code: 2,
  209. Message: ctx.Tr("repo.cloudbrain.morethanonejob"),
  210. })
  211. return
  212. }
  213. }
  214. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName)
  215. if err == nil {
  216. if len(tasks) != 0 {
  217. log.Error("the job name did already exist", ctx.Data["MsgID"])
  218. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  219. return
  220. }
  221. } else {
  222. if !models.IsErrJobNotExist(err) {
  223. log.Error("system error, %v", err, ctx.Data["MsgID"])
  224. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  225. return
  226. }
  227. }
  228. err = downloadCode(sourceRepo, getCodePath(jobName), option.BranchName)
  229. if err != nil {
  230. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
  231. return
  232. }
  233. var aiCenterCode = models.AICenterOfCloudBrainTwo
  234. var specId = setting.FileNoteBook.SpecIdNPU
  235. if setting.ModelartsCD.Enabled {
  236. aiCenterCode = models.AICenterOfChengdu
  237. specId = setting.FileNoteBook.SpecIdNPUCD
  238. }
  239. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  240. JobType: models.JobTypeDebug,
  241. ComputeResource: models.NPU,
  242. Cluster: models.OpenICluster,
  243. AiCenterCode: aiCenterCode})
  244. if err != nil || spec == nil {
  245. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  246. return
  247. }
  248. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  249. log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID)
  250. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  251. return
  252. }
  253. ctx.Repo = &context.Repository{
  254. Repository: repo,
  255. }
  256. var jobId string
  257. if setting.ModelartsCD.Enabled {
  258. jobId, err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPUCD, spec, option.File,modelarts.AutoStopDurationMs/4)
  259. } else {
  260. jobId, err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPU, spec, option.File,modelarts.AutoStopDurationMs/4)
  261. }
  262. if err != nil {
  263. log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
  264. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  265. return
  266. }
  267. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  268. Code: 0,
  269. Message: jobId,
  270. })
  271. }
  272. func isNoteBookFileExist(ctx *context.Context, option api.CreateFileNotebookJobOption) (bool, error) {
  273. repoPathOfNoteBook := models.RepoPath(option.OwnerName, option.ProjectName)
  274. gitRepoOfNoteBook, err := git.OpenRepository(repoPathOfNoteBook)
  275. if err != nil {
  276. log.Error("RepoRef Invalid repo "+repoPathOfNoteBook, err.Error())
  277. return false, err
  278. }
  279. // We opened it, we should close it
  280. defer func() {
  281. // If it's been set to nil then assume someone else has closed it.
  282. if gitRepoOfNoteBook != nil {
  283. gitRepoOfNoteBook.Close()
  284. }
  285. }()
  286. fileExist, err := fileExists(gitRepoOfNoteBook, option.File, option.BranchName)
  287. if err != nil || !fileExist {
  288. log.Error("Get file error:", err, ctx.Data["MsgID"])
  289. return false, err
  290. }
  291. return true, nil
  292. }
  293. func uploadCodeFile(repo *models.Repository, codePath string, branchName string, filePath string, jobName string) string {
  294. err := downloadCode(repo, codePath, branchName)
  295. if err != nil {
  296. return "cloudbrain.load_code_failed"
  297. }
  298. err = uploadOneFileToMinio(codePath, filePath, jobName, cloudbrain.CodeMountPath+"/")
  299. if err != nil {
  300. return "cloudbrain.load_code_failed"
  301. }
  302. return ""
  303. }
  304. func fileExists(gitRepo *git.Repository, path string, branch string) (bool, error) {
  305. commit, err := gitRepo.GetBranchCommit(branch)
  306. if err != nil {
  307. return false, err
  308. }
  309. if _, err := commit.GetTreeEntryByPath(path); err != nil {
  310. return false, err
  311. }
  312. return true, nil
  313. }