You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 20 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. package repo
  2. import (
  3. "code.gitea.io/gitea/modules/git"
  4. "code.gitea.io/gitea/modules/modelarts"
  5. "code.gitea.io/gitea/modules/obs"
  6. "code.gitea.io/gitea/modules/storage"
  7. "encoding/json"
  8. "errors"
  9. "github.com/unknwon/com"
  10. "io"
  11. "net/http"
  12. "os"
  13. "path"
  14. "strconv"
  15. "strings"
  16. "time"
  17. "code.gitea.io/gitea/models"
  18. "code.gitea.io/gitea/modules/auth"
  19. "code.gitea.io/gitea/modules/base"
  20. "code.gitea.io/gitea/modules/context"
  21. "code.gitea.io/gitea/modules/log"
  22. "code.gitea.io/gitea/modules/setting"
  23. )
  24. const (
  25. tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  26. tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new"
  27. tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show"
  28. tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
  29. tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
  30. tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
  31. )
  32. // MustEnableDataset check if repository enable internal cb
  33. func MustEnableModelArts(ctx *context.Context) {
  34. if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) {
  35. ctx.NotFound("MustEnableCloudbrain", nil)
  36. return
  37. }
  38. }
  39. func NotebookIndex(ctx *context.Context) {
  40. MustEnableModelArts(ctx)
  41. repo := ctx.Repo.Repository
  42. page := ctx.QueryInt("page")
  43. if page <= 0 {
  44. page = 1
  45. }
  46. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  47. ListOptions: models.ListOptions{
  48. Page: page,
  49. PageSize: setting.UI.IssuePagingNum,
  50. },
  51. RepoID: repo.ID,
  52. Type: models.TypeCloudBrainNotebook,
  53. })
  54. if err != nil {
  55. ctx.ServerError("Cloudbrain", err)
  56. return
  57. }
  58. for i, task := range ciTasks {
  59. if task.Status == string(models.JobRunning) {
  60. ciTasks[i].CanDebug = true
  61. } else {
  62. ciTasks[i].CanDebug = false
  63. }
  64. }
  65. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  66. pager.SetDefaultParams(ctx)
  67. ctx.Data["Page"] = pager
  68. ctx.Data["PageIsNotebook"] = true
  69. ctx.Data["Tasks"] = ciTasks
  70. ctx.HTML(200, tplModelArtsNotebookIndex)
  71. }
  72. func NotebookNew(ctx *context.Context) {
  73. ctx.Data["PageIsNotebook"] = true
  74. t := time.Now()
  75. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  76. ctx.Data["job_name"] = jobName
  77. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  78. if err != nil {
  79. ctx.ServerError("GetAllUserAttachments failed:", err)
  80. return
  81. }
  82. ctx.Data["attachments"] = attachs
  83. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  84. ctx.Data["env"] = modelarts.NotebookEnv
  85. ctx.Data["notebook_type"] = modelarts.NotebookType
  86. ctx.Data["flavor"] = modelarts.FlavorInfo
  87. ctx.HTML(200, tplModelArtsNotebookNew)
  88. }
  89. func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
  90. ctx.Data["PageIsNotebook"] = true
  91. jobName := form.JobName
  92. uuid := form.Attachment
  93. description := form.Description
  94. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  95. if err != nil {
  96. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
  97. return
  98. }
  99. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  100. }
  101. func NotebookShow(ctx *context.Context) {
  102. ctx.Data["PageIsNotebook"] = true
  103. var jobID = ctx.Params(":jobid")
  104. task, err := models.GetCloudbrainByJobID(jobID)
  105. if err != nil {
  106. ctx.Data["error"] = err.Error()
  107. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  108. return
  109. }
  110. result, err := modelarts.GetJob(jobID)
  111. if err != nil {
  112. ctx.Data["error"] = err.Error()
  113. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  114. return
  115. }
  116. if result != nil {
  117. task.Status = result.Status
  118. err = models.UpdateJob(task)
  119. if err != nil {
  120. ctx.Data["error"] = err.Error()
  121. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  122. return
  123. }
  124. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  125. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  126. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  127. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  128. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  129. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  130. }
  131. ctx.Data["task"] = task
  132. ctx.Data["jobID"] = jobID
  133. ctx.Data["result"] = result
  134. ctx.HTML(200, tplModelArtsNotebookShow)
  135. }
  136. func NotebookDebug(ctx *context.Context) {
  137. var jobID = ctx.Params(":jobid")
  138. _, err := models.GetCloudbrainByJobID(jobID)
  139. if err != nil {
  140. ctx.ServerError("GetCloudbrainByJobID failed", err)
  141. return
  142. }
  143. result, err := modelarts.GetJob(jobID)
  144. if err != nil {
  145. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  146. return
  147. }
  148. res, err := modelarts.GetJobToken(jobID)
  149. if err != nil {
  150. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  151. return
  152. }
  153. urls := strings.Split(result.Spec.Annotations.Url, "/")
  154. urlPrefix := result.Spec.Annotations.TargetDomain
  155. for i, url := range urls {
  156. if i > 2 {
  157. urlPrefix += "/" + url
  158. }
  159. }
  160. debugUrl := urlPrefix + "?token=" + res.Token
  161. ctx.Redirect(debugUrl)
  162. }
  163. func NotebookStop(ctx *context.Context) {
  164. var jobID = ctx.Params(":jobid")
  165. log.Info(jobID)
  166. task, err := models.GetCloudbrainByJobID(jobID)
  167. if err != nil {
  168. ctx.ServerError("GetCloudbrainByJobID failed", err)
  169. return
  170. }
  171. if task.Status != string(models.JobRunning) {
  172. log.Error("the job(%s) is not running", task.JobName)
  173. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  174. return
  175. }
  176. param := models.NotebookAction{
  177. Action: models.ActionStop,
  178. }
  179. res, err := modelarts.StopJob(jobID, param)
  180. if err != nil {
  181. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  182. ctx.ServerError("StopJob failed", err)
  183. return
  184. }
  185. task.Status = res.CurrentStatus
  186. err = models.UpdateJob(task)
  187. if err != nil {
  188. ctx.ServerError("UpdateJob failed", err)
  189. return
  190. }
  191. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  192. }
  193. func NotebookDel(ctx *context.Context) {
  194. var jobID = ctx.Params(":jobid")
  195. task, err := models.GetCloudbrainByJobID(jobID)
  196. if err != nil {
  197. ctx.ServerError("GetCloudbrainByJobID failed", err)
  198. return
  199. }
  200. if task.Status != string(models.JobStopped) {
  201. log.Error("the job(%s) has not been stopped", task.JobName)
  202. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  203. return
  204. }
  205. _, err = modelarts.DelNotebook(jobID)
  206. if err != nil {
  207. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  208. ctx.ServerError("DelJob failed", err)
  209. return
  210. }
  211. err = models.DeleteJob(task)
  212. if err != nil {
  213. ctx.ServerError("DeleteJob failed", err)
  214. return
  215. }
  216. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  217. }
  218. func TrainJobIndex(ctx *context.Context) {
  219. MustEnableModelArts(ctx)
  220. repo := ctx.Repo.Repository
  221. page := ctx.QueryInt("page")
  222. if page <= 0 {
  223. page = 1
  224. }
  225. tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  226. ListOptions: models.ListOptions{
  227. Page: page,
  228. PageSize: setting.UI.IssuePagingNum,
  229. },
  230. RepoID: repo.ID,
  231. Type: models.TypeCloudBrainTrainJob,
  232. })
  233. if err != nil {
  234. ctx.ServerError("Cloudbrain", err)
  235. return
  236. }
  237. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  238. pager.SetDefaultParams(ctx)
  239. ctx.Data["Page"] = pager
  240. ctx.Data["PageIsTrainJob"] = true
  241. ctx.Data["Tasks"] = tasks
  242. ctx.HTML(200, tplModelArtsTrainJobIndex)
  243. }
  244. func TrainJobNew(ctx *context.Context) {
  245. ctx.Data["PageIsTrainJob"] = true
  246. t := time.Now()
  247. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  248. ctx.Data["job_name"] = jobName
  249. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  250. if err != nil {
  251. ctx.ServerError("GetAllUserAttachments failed:", err)
  252. return
  253. }
  254. ctx.Data["attachments"] = attachs
  255. var resourcePools modelarts.ResourcePool
  256. if err = json.Unmarshal([]byte(modelarts.ResourcePools), &resourcePools); err != nil {
  257. ctx.ServerError("json.Unmarshal failed:", err)
  258. return
  259. }
  260. ctx.Data["resource_pools"] = resourcePools.Info
  261. var engines modelarts.Engine
  262. if err = json.Unmarshal([]byte(modelarts.Engines), &engines); err != nil {
  263. ctx.ServerError("json.Unmarshal failed:", err)
  264. return
  265. }
  266. ctx.Data["engines"] = engines.Info
  267. var versionInfos modelarts.VersionInfo
  268. if err = json.Unmarshal([]byte(modelarts.EngineVersions), &versionInfos); err != nil {
  269. ctx.ServerError("json.Unmarshal failed:", err)
  270. return
  271. }
  272. ctx.Data["engine_versions"] = versionInfos.Version
  273. var flavorInfos modelarts.Flavor
  274. if err = json.Unmarshal([]byte(modelarts.FlavorInfos), &flavorInfos); err != nil {
  275. ctx.ServerError("json.Unmarshal failed:", err)
  276. return
  277. }
  278. ctx.Data["flavor_infos"] = flavorInfos.Info
  279. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  280. ctx.Data["train_url"] = outputObsPath
  281. ctx.HTML(200, tplModelArtsTrainJobNew)
  282. }
  283. func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
  284. ctx.Data["PageIsTrainJob"] = true
  285. jobName := form.JobName
  286. uuid := form.Attachment
  287. description := form.Description
  288. workServerNumber := form.WorkServerNumber
  289. engineID := form.EngineID
  290. bootFile := form.BootFile
  291. flavorCode := form.Flavor
  292. params := form.Params
  293. poolID := form.PoolID
  294. isSaveParam := form.IsSaveParam
  295. repo := ctx.Repo.Repository
  296. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  297. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  298. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  299. logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
  300. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  301. //param check
  302. if err := paramCheckCreateTrainJob(form); err != nil {
  303. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  304. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  305. return
  306. }
  307. if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
  308. log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err)
  309. ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
  310. return
  311. }
  312. //todo: upload code (send to file_server todo this work?)
  313. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
  314. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  315. ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
  316. return
  317. }
  318. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
  319. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  320. ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
  321. return
  322. }
  323. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  324. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  325. ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
  326. return
  327. }
  328. //todo: del local code?
  329. var parameters models.Parameters
  330. param := make([]models.Parameter, 0)
  331. param = append(param, models.Parameter{
  332. Label: modelarts.TrainUrl,
  333. Value: outputObsPath,
  334. }, models.Parameter{
  335. Label: modelarts.DataUrl,
  336. Value: dataPath,
  337. })
  338. if len(params) != 0 {
  339. err := json.Unmarshal([]byte(params), &parameters)
  340. if err != nil {
  341. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  342. ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form)
  343. return
  344. }
  345. for _, parameter := range parameters.Parameter {
  346. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  347. param = append(param, models.Parameter{
  348. Label: parameter.Label,
  349. Value: parameter.Value,
  350. })
  351. }
  352. }
  353. }
  354. if isSaveParam == "on" {
  355. if form.ParameterTemplateName == "" {
  356. log.Error("ParameterTemplateName is empty")
  357. ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form)
  358. return
  359. }
  360. _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
  361. ConfigName: form.ParameterTemplateName,
  362. Description: form.PrameterDescription,
  363. DataUrl: dataPath,
  364. AppUrl: codeObsPath,
  365. BootFileUrl: codeObsPath + bootFile,
  366. TrainUrl: outputObsPath,
  367. Flavor: models.Flavor{
  368. Code: flavorCode,
  369. },
  370. WorkServerNum: workServerNumber,
  371. EngineID: int64(engineID),
  372. LogUrl: logObsPath,
  373. PoolID: poolID,
  374. Parameter: param,
  375. })
  376. if err != nil {
  377. log.Error("Failed to CreateTrainJobConfig: %v", err)
  378. ctx.RenderWithErr("保存作业参数失败:" + err.Error(), tplModelArtsTrainJobNew, &form)
  379. return
  380. }
  381. }
  382. req := &modelarts.GenerateTrainJobReq{
  383. JobName: jobName,
  384. DataUrl: dataPath,
  385. Description: description,
  386. CodeObsPath: codeObsPath,
  387. BootFile: codeObsPath + bootFile,
  388. TrainUrl: outputObsPath,
  389. FlavorCode: flavorCode,
  390. WorkServerNumber: workServerNumber,
  391. EngineID: int64(engineID),
  392. LogUrl: logObsPath,
  393. PoolID: poolID,
  394. Uuid: uuid,
  395. Parameters: param,
  396. }
  397. err := modelarts.GenerateTrainJob(ctx, req)
  398. if err != nil {
  399. log.Error("GenerateTrainJob failed:%v", err.Error())
  400. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  401. return
  402. }
  403. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  404. }
  405. // readDir reads the directory named by dirname and returns
  406. // a list of directory entries sorted by filename.
  407. func readDir(dirname string) ([]os.FileInfo, error) {
  408. f, err := os.Open(dirname)
  409. if err != nil {
  410. return nil, err
  411. }
  412. list, err := f.Readdir(100)
  413. f.Close()
  414. if err != nil {
  415. //todo: can not upload empty folder
  416. if err == io.EOF {
  417. return nil, nil
  418. }
  419. return nil, err
  420. }
  421. //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() })
  422. return list, nil
  423. }
  424. func uploadCodeToObs(codePath, jobName, parentDir string) error {
  425. files, err := readDir(codePath)
  426. if err != nil {
  427. log.Error("readDir(%s) failed: %s", codePath, err.Error())
  428. return err
  429. }
  430. for _, file := range files {
  431. if file.IsDir() {
  432. input := &obs.PutObjectInput{}
  433. input.Bucket = setting.Bucket
  434. input.Key = parentDir + file.Name() + "/"
  435. _, err = storage.ObsCli.PutObject(input)
  436. if err != nil {
  437. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  438. return err
  439. }
  440. if err = uploadCodeToObs(codePath + file.Name() + "/", jobName, parentDir + file.Name() + "/"); err != nil {
  441. log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error())
  442. return err
  443. }
  444. } else {
  445. input := &obs.PutFileInput{}
  446. input.Bucket = setting.Bucket
  447. input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name()
  448. input.SourceFile = codePath + file.Name()
  449. _, err = storage.ObsCli.PutFile(input)
  450. if err != nil {
  451. log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error())
  452. return err
  453. }
  454. }
  455. }
  456. return nil
  457. }
  458. func obsMkdir(dir string) error {
  459. input := &obs.PutObjectInput{}
  460. input.Bucket = setting.Bucket
  461. input.Key = dir
  462. _, err := storage.ObsCli.PutObject(input)
  463. if err != nil {
  464. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  465. return err
  466. }
  467. return nil
  468. }
  469. func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
  470. if !strings.HasSuffix(form.BootFile, ".py") {
  471. log.Error("the boot file(%s) must be a python file", form.BootFile)
  472. return errors.New("启动文件必须是python文件")
  473. }
  474. if form.WorkServerNumber > 25 || form.WorkServerNumber < 1{
  475. log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber)
  476. return errors.New("计算节点数必须在1-25之间")
  477. }
  478. return nil
  479. }
  480. func TrainJobShow(ctx *context.Context) {
  481. ctx.Data["PageIsTrainJob"] = true
  482. var jobID = ctx.Params(":jobid")
  483. task, err := models.GetCloudbrainByJobID(jobID)
  484. if err != nil {
  485. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  486. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  487. return
  488. }
  489. attach, err := models.GetAttachmentByUUID(task.Uuid)
  490. if err != nil {
  491. log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error())
  492. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  493. return
  494. }
  495. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  496. if err != nil {
  497. log.Error("GetJob(%s) failed:%v", jobID, err.Error())
  498. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  499. return
  500. }
  501. if result != nil {
  502. result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05")
  503. result.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  504. result.DatasetName = attach.Name
  505. }
  506. resultLogFile, resultLog, err := trainJobGetLog(jobID)
  507. if err != nil {
  508. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  509. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  510. return
  511. }
  512. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  513. ctx.Data["log"] = resultLog
  514. ctx.Data["task"] = task
  515. ctx.Data["jobID"] = jobID
  516. ctx.Data["result"] = result
  517. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  518. }
  519. func TrainJobGetLog(ctx *context.Context) {
  520. ctx.Data["PageIsTrainJob"] = true
  521. var jobID = ctx.Params(":jobid")
  522. var logFileName = ctx.Query("file_name")
  523. var baseLine = ctx.Query("base_line")
  524. var order = ctx.Query("order")
  525. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  526. log.Error("order(%s) check failed", order)
  527. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  528. return
  529. }
  530. task, err := models.GetCloudbrainByJobID(jobID)
  531. if err != nil {
  532. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  533. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  534. return
  535. }
  536. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, 20)
  537. if err != nil {
  538. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  539. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  540. return
  541. }
  542. ctx.Data["log"] = result
  543. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  544. }
  545. func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error){
  546. task, err := models.GetCloudbrainByJobID(jobID)
  547. if err != nil {
  548. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  549. return nil, nil, err
  550. }
  551. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
  552. if err != nil {
  553. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  554. return nil, nil, err
  555. }
  556. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, 20)
  557. if err != nil {
  558. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  559. return nil, nil, err
  560. }
  561. return resultLogFile, result, err
  562. }
  563. func TrainJobDel(ctx *context.Context) {
  564. var jobID = ctx.Params(":jobid")
  565. task, err := models.GetCloudbrainByJobID(jobID)
  566. if err != nil {
  567. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  568. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  569. return
  570. }
  571. _, err = modelarts.DelTrainJob(jobID)
  572. if err != nil {
  573. log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
  574. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  575. return
  576. }
  577. err = models.DeleteJob(task)
  578. if err != nil {
  579. ctx.ServerError("DeleteJob failed", err)
  580. return
  581. }
  582. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  583. }
  584. func TrainJobStop(ctx *context.Context) {
  585. var jobID = ctx.Params(":jobid")
  586. task, err := models.GetCloudbrainByJobID(jobID)
  587. if err != nil {
  588. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  589. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  590. return
  591. }
  592. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  593. if err != nil {
  594. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  595. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  596. return
  597. }
  598. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  599. }