You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 32 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago

  1. package repo
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "io"
  6. "net/http"
  7. "os"
  8. "path"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "code.gitea.io/gitea/models"
  13. "code.gitea.io/gitea/modules/auth"
  14. "code.gitea.io/gitea/modules/base"
  15. "code.gitea.io/gitea/modules/context"
  16. "code.gitea.io/gitea/modules/git"
  17. "code.gitea.io/gitea/modules/log"
  18. "code.gitea.io/gitea/modules/modelarts"
  19. "code.gitea.io/gitea/modules/obs"
  20. "code.gitea.io/gitea/modules/setting"
  21. "code.gitea.io/gitea/modules/storage"
  22. "github.com/unknwon/com"
  23. )
  24. const (
  25. // tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  26. tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  27. tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new"
  28. tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show"
  29. tplModelArtsIndex base.TplName = "repo/modelarts/index"
  30. tplModelArtsNew base.TplName = "repo/modelarts/new"
  31. tplModelArtsShow base.TplName = "repo/modelarts/show"
  32. tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
  33. tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
  34. tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
  35. tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index"
  36. )
  37. // MustEnableDataset check if repository enable internal cb
  38. func MustEnableModelArts(ctx *context.Context) {
  39. if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) {
  40. ctx.NotFound("MustEnableCloudbrain", nil)
  41. return
  42. }
  43. }
  44. func ModelArtsIndex(ctx *context.Context) {
  45. MustEnableModelArts(ctx)
  46. repo := ctx.Repo.Repository
  47. page := ctx.QueryInt("page")
  48. if page <= 0 {
  49. page = 1
  50. }
  51. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  52. ListOptions: models.ListOptions{
  53. Page: page,
  54. PageSize: setting.UI.IssuePagingNum,
  55. },
  56. RepoID: repo.ID,
  57. Type: models.TypeCloudBrainTwo,
  58. })
  59. if err != nil {
  60. ctx.ServerError("Cloudbrain", err)
  61. return
  62. }
  63. for i, task := range ciTasks {
  64. if task.Status == string(models.JobRunning) {
  65. ciTasks[i].CanDebug = true
  66. } else {
  67. ciTasks[i].CanDebug = false
  68. }
  69. ciTasks[i].CanDel = models.CanDelJob(ctx.IsSigned, ctx.User, task)
  70. }
  71. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  72. pager.SetDefaultParams(ctx)
  73. ctx.Data["Page"] = pager
  74. ctx.Data["PageIsCloudBrain"] = true
  75. ctx.Data["Tasks"] = ciTasks
  76. ctx.HTML(200, tplModelArtsIndex)
  77. }
  78. func ModelArtsNew(ctx *context.Context) {
  79. ctx.Data["PageIsCloudBrain"] = true
  80. t := time.Now()
  81. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  82. ctx.Data["job_name"] = jobName
  83. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  84. if err != nil {
  85. ctx.ServerError("GetAllUserAttachments failed:", err)
  86. return
  87. }
  88. ctx.Data["attachments"] = attachs
  89. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  90. ctx.Data["env"] = modelarts.NotebookEnv
  91. ctx.Data["notebook_type"] = modelarts.NotebookType
  92. if modelarts.FlavorInfos == nil {
  93. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  94. }
  95. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  96. ctx.HTML(200, tplModelArtsNew)
  97. }
  98. func ModelArtsCreate(ctx *context.Context, form auth.CreateModelArtsForm) {
  99. ctx.Data["PageIsCloudBrain"] = true
  100. jobName := form.JobName
  101. uuid := form.Attachment
  102. description := form.Description
  103. //repo := ctx.Repo.Repository
  104. if !jobNamePattern.MatchString(jobName) {
  105. ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplModelArtsNew, &form)
  106. return
  107. }
  108. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  109. if err != nil {
  110. ctx.RenderWithErr(err.Error(), tplModelArtsNew, &form)
  111. return
  112. }
  113. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  114. }
  115. func ModelArtsShow(ctx *context.Context) {
  116. ctx.Data["PageIsCloudBrain"] = true
  117. var jobID = ctx.Params(":jobid")
  118. task, err := models.GetCloudbrainByJobID(jobID)
  119. if err != nil {
  120. ctx.Data["error"] = err.Error()
  121. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  122. return
  123. }
  124. result, err := modelarts.GetJob(jobID)
  125. if err != nil {
  126. ctx.Data["error"] = err.Error()
  127. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  128. return
  129. }
  130. if result != nil {
  131. task.Status = result.Status
  132. err = models.UpdateJob(task)
  133. if err != nil {
  134. ctx.Data["error"] = err.Error()
  135. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  136. return
  137. }
  138. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  139. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  140. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  141. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  142. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  143. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  144. }
  145. ctx.Data["task"] = task
  146. ctx.Data["jobID"] = jobID
  147. ctx.Data["result"] = result
  148. ctx.HTML(200, tplModelArtsShow)
  149. }
  150. func ModelArtsDebug(ctx *context.Context) {
  151. var jobID = ctx.Params(":jobid")
  152. _, err := models.GetCloudbrainByJobID(jobID)
  153. if err != nil {
  154. ctx.ServerError("GetCloudbrainByJobID failed", err)
  155. return
  156. }
  157. result, err := modelarts.GetJob(jobID)
  158. if err != nil {
  159. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  160. return
  161. }
  162. res, err := modelarts.GetJobToken(jobID)
  163. if err != nil {
  164. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  165. return
  166. }
  167. urls := strings.Split(result.Spec.Annotations.Url, "/")
  168. urlPrefix := result.Spec.Annotations.TargetDomain
  169. for i, url := range urls {
  170. if i > 2 {
  171. urlPrefix += "/" + url
  172. }
  173. }
  174. //urlPrefix := result.Spec.Annotations.TargetDomain + "/modelarts/internal/hub/notebook/user/" + task.JobID
  175. log.Info(urlPrefix)
  176. debugUrl := urlPrefix + "?token=" + res.Token
  177. ctx.Redirect(debugUrl)
  178. }
  179. func ModelArtsStop(ctx *context.Context) {
  180. var jobID = ctx.Params(":jobid")
  181. log.Info(jobID)
  182. task, err := models.GetCloudbrainByJobID(jobID)
  183. if err != nil {
  184. ctx.ServerError("GetCloudbrainByJobID failed", err)
  185. return
  186. }
  187. if task.Status != string(models.JobRunning) {
  188. log.Error("the job(%s) is not running", task.JobName)
  189. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  190. return
  191. }
  192. param := models.NotebookAction{
  193. Action: models.ActionStop,
  194. }
  195. res, err := modelarts.StopJob(jobID, param)
  196. if err != nil {
  197. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  198. ctx.ServerError("StopJob failed", err)
  199. return
  200. }
  201. task.Status = res.CurrentStatus
  202. err = models.UpdateJob(task)
  203. if err != nil {
  204. ctx.ServerError("UpdateJob failed", err)
  205. return
  206. }
  207. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  208. }
  209. func ModelArtsDel(ctx *context.Context) {
  210. var jobID = ctx.Params(":jobid")
  211. task, err := models.GetCloudbrainByJobID(jobID)
  212. if err != nil {
  213. ctx.ServerError("GetCloudbrainByJobID failed", err)
  214. return
  215. }
  216. if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) {
  217. log.Error("the job(%s) has not been stopped", task.JobName)
  218. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  219. return
  220. }
  221. _, err = modelarts.DelJob(jobID)
  222. if err != nil {
  223. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  224. ctx.ServerError("DelJob failed", err)
  225. return
  226. }
  227. err = models.DeleteJob(task)
  228. if err != nil {
  229. ctx.ServerError("DeleteJob failed", err)
  230. return
  231. }
  232. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  233. }
  234. func NotebookIndex(ctx *context.Context) {
  235. MustEnableModelArts(ctx)
  236. repo := ctx.Repo.Repository
  237. page := ctx.QueryInt("page")
  238. if page <= 0 {
  239. page = 1
  240. }
  241. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  242. ListOptions: models.ListOptions{
  243. Page: page,
  244. PageSize: setting.UI.IssuePagingNum,
  245. },
  246. RepoID: repo.ID,
  247. Type: models.TypeCloudBrainNotebook,
  248. })
  249. if err != nil {
  250. ctx.ServerError("Cloudbrain", err)
  251. return
  252. }
  253. for i, task := range ciTasks {
  254. if task.Status == string(models.JobRunning) {
  255. ciTasks[i].CanDebug = true
  256. } else {
  257. ciTasks[i].CanDebug = false
  258. }
  259. }
  260. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  261. pager.SetDefaultParams(ctx)
  262. ctx.Data["Page"] = pager
  263. ctx.Data["PageIsCloudBrain"] = true
  264. ctx.Data["Tasks"] = ciTasks
  265. ctx.HTML(200, tplModelArtsNotebookIndex)
  266. }
  267. func NotebookNew(ctx *context.Context) {
  268. ctx.Data["PageIsCloudBrain"] = true
  269. t := time.Now()
  270. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  271. ctx.Data["job_name"] = jobName
  272. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  273. if err != nil {
  274. ctx.ServerError("GetAllUserAttachments failed:", err)
  275. return
  276. }
  277. ctx.Data["attachments"] = attachs
  278. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  279. ctx.Data["env"] = modelarts.NotebookEnv
  280. ctx.Data["notebook_type"] = modelarts.NotebookType
  281. if modelarts.FlavorInfos == nil {
  282. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  283. }
  284. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  285. ctx.HTML(200, tplModelArtsNotebookNew)
  286. }
  287. func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
  288. ctx.Data["PageIsNotebook"] = true
  289. jobName := form.JobName
  290. uuid := form.Attachment
  291. description := form.Description
  292. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  293. if err != nil {
  294. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
  295. return
  296. }
  297. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  298. }
  299. func NotebookShow(ctx *context.Context) {
  300. ctx.Data["PageIsCloudBrain"] = true
  301. var jobID = ctx.Params(":jobid")
  302. task, err := models.GetCloudbrainByJobID(jobID)
  303. if err != nil {
  304. ctx.Data["error"] = err.Error()
  305. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  306. return
  307. }
  308. result, err := modelarts.GetJob(jobID)
  309. if err != nil {
  310. ctx.Data["error"] = err.Error()
  311. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  312. return
  313. }
  314. if result != nil {
  315. task.Status = result.Status
  316. err = models.UpdateJob(task)
  317. if err != nil {
  318. ctx.Data["error"] = err.Error()
  319. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  320. return
  321. }
  322. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  323. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  324. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  325. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  326. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  327. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  328. }
  329. ctx.Data["task"] = task
  330. ctx.Data["jobID"] = jobID
  331. ctx.Data["result"] = result
  332. ctx.HTML(200, tplModelArtsNotebookShow)
  333. }
  334. func NotebookDebug(ctx *context.Context) {
  335. var jobID = ctx.Params(":jobid")
  336. _, err := models.GetCloudbrainByJobID(jobID)
  337. if err != nil {
  338. ctx.ServerError("GetCloudbrainByJobID failed", err)
  339. return
  340. }
  341. result, err := modelarts.GetJob(jobID)
  342. if err != nil {
  343. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  344. return
  345. }
  346. res, err := modelarts.GetJobToken(jobID)
  347. if err != nil {
  348. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  349. return
  350. }
  351. urls := strings.Split(result.Spec.Annotations.Url, "/")
  352. urlPrefix := result.Spec.Annotations.TargetDomain
  353. for i, url := range urls {
  354. if i > 2 {
  355. urlPrefix += "/" + url
  356. }
  357. }
  358. debugUrl := urlPrefix + "?token=" + res.Token
  359. ctx.Redirect(debugUrl)
  360. }
  361. func NotebookStop(ctx *context.Context) {
  362. var jobID = ctx.Params(":jobid")
  363. log.Info(jobID)
  364. task, err := models.GetCloudbrainByJobID(jobID)
  365. if err != nil {
  366. ctx.ServerError("GetCloudbrainByJobID failed", err)
  367. return
  368. }
  369. if task.Status != string(models.JobRunning) {
  370. log.Error("the job(%s) is not running", task.JobName)
  371. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  372. return
  373. }
  374. param := models.NotebookAction{
  375. Action: models.ActionStop,
  376. }
  377. res, err := modelarts.StopJob(jobID, param)
  378. if err != nil {
  379. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  380. ctx.ServerError("StopJob failed", err)
  381. return
  382. }
  383. task.Status = res.CurrentStatus
  384. err = models.UpdateJob(task)
  385. if err != nil {
  386. ctx.ServerError("UpdateJob failed", err)
  387. return
  388. }
  389. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  390. }
  391. func NotebookDel(ctx *context.Context) {
  392. var jobID = ctx.Params(":jobid")
  393. task, err := models.GetCloudbrainByJobID(jobID)
  394. if err != nil {
  395. ctx.ServerError("GetCloudbrainByJobID failed", err)
  396. return
  397. }
  398. if task.Status != string(models.JobStopped) {
  399. log.Error("the job(%s) has not been stopped", task.JobName)
  400. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  401. return
  402. }
  403. _, err = modelarts.DelNotebook(jobID)
  404. if err != nil {
  405. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  406. ctx.ServerError("DelJob failed", err)
  407. return
  408. }
  409. err = models.DeleteJob(task)
  410. if err != nil {
  411. ctx.ServerError("DeleteJob failed", err)
  412. return
  413. }
  414. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  415. }
  416. func TrainJobIndex(ctx *context.Context) {
  417. MustEnableModelArts(ctx)
  418. //can, err := canUserCreateTrainJob(ctx.User.ID)
  419. //if err != nil {
  420. // ctx.ServerError("canUserCreateTrainJob", err)
  421. // return
  422. //}
  423. //
  424. //ctx.Data["CanCreate"] = can
  425. repo := ctx.Repo.Repository
  426. page := ctx.QueryInt("page")
  427. if page <= 0 {
  428. page = 1
  429. }
  430. tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  431. ListOptions: models.ListOptions{
  432. Page: page,
  433. PageSize: setting.UI.IssuePagingNum,
  434. },
  435. RepoID: repo.ID,
  436. Type: models.TypeCloudBrainTrainJob,
  437. })
  438. if err != nil {
  439. ctx.ServerError("Cloudbrain", err)
  440. return
  441. }
  442. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  443. pager.SetDefaultParams(ctx)
  444. ctx.Data["Page"] = pager
  445. ctx.Data["PageIsCloudBrain"] = true
  446. ctx.Data["Tasks"] = tasks
  447. ctx.HTML(200, tplModelArtsTrainJobIndex)
  448. }
  449. func TrainJobNew(ctx *context.Context) {
  450. ctx.Data["PageIsCloudBrain"] = true
  451. //can, err := canUserCreateTrainJob(ctx.User.ID)
  452. //if err != nil {
  453. // ctx.ServerError("canUserCreateTrainJob", err)
  454. // return
  455. //}
  456. //
  457. //if !can {
  458. // log.Error("the user can not create train-job")
  459. // ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job"))
  460. // return
  461. //}
  462. t := time.Now()
  463. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  464. ctx.Data["job_name"] = jobName
  465. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  466. if err != nil {
  467. ctx.ServerError("GetAllUserAttachments failed:", err)
  468. return
  469. }
  470. ctx.Data["attachments"] = attachs
  471. var resourcePools modelarts.ResourcePool
  472. if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
  473. ctx.ServerError("json.Unmarshal failed:", err)
  474. return
  475. }
  476. ctx.Data["resource_pools"] = resourcePools.Info
  477. var engines modelarts.Engine
  478. if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
  479. ctx.ServerError("json.Unmarshal failed:", err)
  480. return
  481. }
  482. ctx.Data["engines"] = engines.Info
  483. var versionInfos modelarts.VersionInfo
  484. if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
  485. ctx.ServerError("json.Unmarshal failed:", err)
  486. return
  487. }
  488. ctx.Data["engine_versions"] = versionInfos.Version
  489. var flavorInfos modelarts.Flavor
  490. if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
  491. ctx.ServerError("json.Unmarshal failed:", err)
  492. return
  493. }
  494. ctx.Data["flavor_infos"] = flavorInfos.Info
  495. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  496. ctx.Data["train_url"] = outputObsPath
  497. configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
  498. if err != nil {
  499. ctx.ServerError("getConfigList failed:", err)
  500. return
  501. }
  502. ctx.Data["config_list"] = configList.ParaConfigs
  503. ctx.HTML(200, tplModelArtsTrainJobNew)
  504. }
  505. func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
  506. ctx.Data["PageIsTrainJob"] = true
  507. jobName := form.JobName
  508. uuid := form.Attachment
  509. description := form.Description
  510. workServerNumber := form.WorkServerNumber
  511. engineID := form.EngineID
  512. bootFile := form.BootFile
  513. flavorCode := form.Flavor
  514. params := form.Params
  515. poolID := form.PoolID
  516. isSaveParam := form.IsSaveParam
  517. repo := ctx.Repo.Repository
  518. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  519. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  520. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  521. logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
  522. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  523. //can, err := canUserCreateTrainJob(ctx.User.ID)
  524. //if err != nil {
  525. // ctx.ServerError("canUserCreateTrainJob", err)
  526. // return
  527. //}
  528. //
  529. //if !can {
  530. // log.Error("the user can not create train-job")
  531. // ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form)
  532. // return
  533. //}
  534. //param check
  535. if err := paramCheckCreateTrainJob(form); err != nil {
  536. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  537. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  538. return
  539. }
  540. if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
  541. log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err)
  542. ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
  543. return
  544. }
  545. //todo: upload code (send to file_server todo this work?)
  546. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
  547. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  548. ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
  549. return
  550. }
  551. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
  552. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  553. ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
  554. return
  555. }
  556. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  557. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  558. ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
  559. return
  560. }
  561. //todo: del local code?
  562. var parameters models.Parameters
  563. param := make([]models.Parameter, 0)
  564. param = append(param, models.Parameter{
  565. Label: modelarts.TrainUrl,
  566. Value: outputObsPath,
  567. }, models.Parameter{
  568. Label: modelarts.DataUrl,
  569. Value: dataPath,
  570. })
  571. if len(params) != 0 {
  572. err := json.Unmarshal([]byte(params), &parameters)
  573. if err != nil {
  574. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  575. ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form)
  576. return
  577. }
  578. for _, parameter := range parameters.Parameter {
  579. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  580. param = append(param, models.Parameter{
  581. Label: parameter.Label,
  582. Value: parameter.Value,
  583. })
  584. }
  585. }
  586. }
  587. //save param config
  588. if isSaveParam == "on" {
  589. if form.ParameterTemplateName == "" {
  590. log.Error("ParameterTemplateName is empty")
  591. ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form)
  592. return
  593. }
  594. _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
  595. ConfigName: form.ParameterTemplateName,
  596. Description: form.PrameterDescription,
  597. DataUrl: dataPath,
  598. AppUrl: codeObsPath,
  599. BootFileUrl: codeObsPath + bootFile,
  600. TrainUrl: outputObsPath,
  601. Flavor: models.Flavor{
  602. Code: flavorCode,
  603. },
  604. WorkServerNum: workServerNumber,
  605. EngineID: int64(engineID),
  606. LogUrl: logObsPath,
  607. PoolID: poolID,
  608. Parameter: param,
  609. })
  610. if err != nil {
  611. log.Error("Failed to CreateTrainJobConfig: %v", err)
  612. ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form)
  613. return
  614. }
  615. }
  616. req := &modelarts.GenerateTrainJobReq{
  617. JobName: jobName,
  618. DataUrl: dataPath,
  619. Description: description,
  620. CodeObsPath: codeObsPath,
  621. BootFile: codeObsPath + bootFile,
  622. TrainUrl: outputObsPath,
  623. FlavorCode: flavorCode,
  624. WorkServerNumber: workServerNumber,
  625. EngineID: int64(engineID),
  626. LogUrl: logObsPath,
  627. PoolID: poolID,
  628. Uuid: uuid,
  629. Parameters: param,
  630. }
  631. err := modelarts.GenerateTrainJob(ctx, req)
  632. if err != nil {
  633. log.Error("GenerateTrainJob failed:%v", err.Error())
  634. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  635. return
  636. }
  637. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  638. }
  639. // readDir reads the directory named by dirname and returns
  640. // a list of directory entries sorted by filename.
  641. func readDir(dirname string) ([]os.FileInfo, error) {
  642. f, err := os.Open(dirname)
  643. if err != nil {
  644. return nil, err
  645. }
  646. list, err := f.Readdir(100)
  647. f.Close()
  648. if err != nil {
  649. //todo: can not upload empty folder
  650. if err == io.EOF {
  651. return nil, nil
  652. }
  653. return nil, err
  654. }
  655. //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() })
  656. return list, nil
  657. }
  658. func uploadCodeToObs(codePath, jobName, parentDir string) error {
  659. files, err := readDir(codePath)
  660. if err != nil {
  661. log.Error("readDir(%s) failed: %s", codePath, err.Error())
  662. return err
  663. }
  664. for _, file := range files {
  665. if file.IsDir() {
  666. input := &obs.PutObjectInput{}
  667. input.Bucket = setting.Bucket
  668. input.Key = parentDir + file.Name() + "/"
  669. _, err = storage.ObsCli.PutObject(input)
  670. if err != nil {
  671. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  672. return err
  673. }
  674. if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil {
  675. log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error())
  676. return err
  677. }
  678. } else {
  679. input := &obs.PutFileInput{}
  680. input.Bucket = setting.Bucket
  681. input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name()
  682. input.SourceFile = codePath + file.Name()
  683. _, err = storage.ObsCli.PutFile(input)
  684. if err != nil {
  685. log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error())
  686. return err
  687. }
  688. }
  689. }
  690. return nil
  691. }
  692. func obsMkdir(dir string) error {
  693. input := &obs.PutObjectInput{}
  694. input.Bucket = setting.Bucket
  695. input.Key = dir
  696. _, err := storage.ObsCli.PutObject(input)
  697. if err != nil {
  698. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  699. return err
  700. }
  701. return nil
  702. }
  703. func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
  704. if !strings.HasSuffix(form.BootFile, ".py") {
  705. log.Error("the boot file(%s) must be a python file", form.BootFile)
  706. return errors.New("启动文件必须是python文件")
  707. }
  708. if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 {
  709. log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber)
  710. return errors.New("计算节点数必须在1-25之间")
  711. }
  712. return nil
  713. }
  714. func TrainJobShow(ctx *context.Context) {
  715. ctx.Data["PageIsCloudBrain"] = true
  716. var jobID = ctx.Params(":jobid")
  717. task, err := models.GetCloudbrainByJobID(jobID)
  718. if err != nil {
  719. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  720. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  721. return
  722. }
  723. attach, err := models.GetAttachmentByUUID(task.Uuid)
  724. if err != nil {
  725. log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error())
  726. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  727. return
  728. }
  729. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  730. if err != nil {
  731. log.Error("GetJob(%s) failed:%v", jobID, err.Error())
  732. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  733. return
  734. }
  735. if result != nil {
  736. result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05")
  737. if result.Duration != 0 {
  738. result.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000)
  739. } else {
  740. result.TrainJobDuration = "00:00:00"
  741. }
  742. result.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  743. err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration))
  744. if err != nil {
  745. ctx.ServerError("UpdateJob failed", err)
  746. return
  747. }
  748. result.DatasetName = attach.Name
  749. }
  750. resultLogFile, resultLog, err := trainJobGetLog(jobID)
  751. if err != nil {
  752. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  753. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  754. return
  755. }
  756. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  757. ctx.Data["log"] = resultLog
  758. ctx.Data["task"] = task
  759. ctx.Data["jobID"] = jobID
  760. ctx.Data["result"] = result
  761. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  762. }
  763. func addZero(t int64) (m string) {
  764. if t < 10 {
  765. m = "0" + strconv.FormatInt(t, 10)
  766. return m
  767. } else {
  768. return strconv.FormatInt(t, 10)
  769. }
  770. }
  771. func TrainJobGetLog(ctx *context.Context) {
  772. ctx.Data["PageIsTrainJob"] = true
  773. var jobID = ctx.Params(":jobid")
  774. var logFileName = ctx.Query("file_name")
  775. var baseLine = ctx.Query("base_line")
  776. var order = ctx.Query("order")
  777. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  778. log.Error("order(%s) check failed", order)
  779. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  780. return
  781. }
  782. task, err := models.GetCloudbrainByJobID(jobID)
  783. if err != nil {
  784. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  785. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  786. return
  787. }
  788. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  789. if err != nil {
  790. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  791. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  792. return
  793. }
  794. ctx.Data["log"] = result
  795. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  796. }
  797. func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
  798. task, err := models.GetCloudbrainByJobID(jobID)
  799. if err != nil {
  800. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  801. return nil, nil, err
  802. }
  803. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
  804. if err != nil {
  805. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  806. return nil, nil, err
  807. }
  808. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
  809. if err != nil {
  810. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  811. return nil, nil, err
  812. }
  813. return resultLogFile, result, err
  814. }
  815. func TrainJobDel(ctx *context.Context) {
  816. var jobID = ctx.Params(":jobid")
  817. task, err := models.GetCloudbrainByJobID(jobID)
  818. if err != nil {
  819. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  820. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  821. return
  822. }
  823. _, err = modelarts.DelTrainJob(jobID)
  824. if err != nil {
  825. log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
  826. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  827. return
  828. }
  829. err = models.DeleteJob(task)
  830. if err != nil {
  831. ctx.ServerError("DeleteJob failed", err)
  832. return
  833. }
  834. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  835. }
  836. func TrainJobStop(ctx *context.Context) {
  837. var jobID = ctx.Params(":jobid")
  838. task, err := models.GetCloudbrainByJobID(jobID)
  839. if err != nil {
  840. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  841. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  842. return
  843. }
  844. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  845. if err != nil {
  846. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  847. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  848. return
  849. }
  850. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  851. }
  852. func canUserCreateTrainJob(uid int64) (bool, error) {
  853. org, err := models.GetOrgByName(setting.AllowedOrg)
  854. if err != nil {
  855. log.Error("get allowed org failed: ", setting.AllowedOrg)
  856. return false, err
  857. }
  858. return org.IsOrgMember(uid)
  859. }
  860. func TrainJobGetConfigList(ctx *context.Context) {
  861. ctx.Data["PageIsTrainJob"] = true
  862. var jobID = ctx.Params(":jobid")
  863. var logFileName = ctx.Query("file_name")
  864. var baseLine = ctx.Query("base_line")
  865. var order = ctx.Query("order")
  866. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  867. log.Error("order(%s) check failed", order)
  868. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  869. return
  870. }
  871. task, err := models.GetCloudbrainByJobID(jobID)
  872. if err != nil {
  873. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  874. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  875. return
  876. }
  877. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  878. if err != nil {
  879. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  880. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  881. return
  882. }
  883. ctx.Data["log"] = result
  884. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  885. }
  886. func getConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
  887. var result models.GetConfigListResult
  888. list, err := modelarts.GetConfigList(perPage, page, sortBy, order, searchContent, configType)
  889. if err != nil {
  890. log.Error("GetConfigList failed:", err)
  891. return &result, err
  892. }
  893. for _, config := range list.ParaConfigs {
  894. paraConfig, err := modelarts.GetParaConfig(config.ConfigName, configType)
  895. if err != nil {
  896. log.Error("GetParaConfig failed:", err)
  897. return &result, err
  898. }
  899. config.Result = paraConfig
  900. }
  901. return list, nil
  902. }
  903. func TrainJobShowModels(ctx *context.Context) {
  904. ctx.Data["PageIsCloudBrain"] = true
  905. jobID := ctx.Params(":jobid")
  906. parentDir := ctx.Query("parentDir")
  907. dirArray := strings.Split(parentDir, "/")
  908. task, err := models.GetCloudbrainByJobID(jobID)
  909. if err != nil {
  910. log.Error("no such job!", ctx.Data["msgID"])
  911. ctx.ServerError("no such job:", err)
  912. return
  913. }
  914. models, err := storage.GetObsListObject(task.JobName, parentDir)
  915. if err != nil {
  916. log.Info("get TrainJobListModel failed:", err)
  917. ctx.ServerError("GetObsListObject:", err)
  918. return
  919. }
  920. ctx.Data["Path"] = dirArray
  921. ctx.Data["Dirs"] = models
  922. ctx.Data["task"] = task
  923. ctx.Data["JobID"] = jobID
  924. ctx.HTML(200, tplModelArtsTrainJobShowModels)
  925. }
  926. func TrainJobDownloadModel(ctx *context.Context) {
  927. parentDir := ctx.Query("parentDir")
  928. fileName := ctx.Query("fileName")
  929. jobName := ctx.Query("jobName")
  930. url, err := storage.GetObsCreateSignedUrl(jobName, parentDir, fileName)
  931. if err != nil {
  932. log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
  933. ctx.ServerError("GetObsCreateSignedUrl", err)
  934. return
  935. }
  936. http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
  937. }