You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resty.go 46 kB

4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
2 years ago
2 years ago
2 years ago
2 years ago
4 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
3 years ago
2 years ago
3 years ago
2 years ago
3 years ago
3 years ago
3 years ago
2 years ago

  1. package modelarts
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "net/http"
  7. "strconv"
  8. "code.gitea.io/gitea/models"
  9. "code.gitea.io/gitea/modules/log"
  10. "code.gitea.io/gitea/modules/setting"
  11. "github.com/go-resty/resty/v2"
  12. )
  13. var (
  14. restyClient *resty.Client
  15. HOST string
  16. TOKEN string
  17. )
  18. const (
  19. methodPassword = "password"
  20. urlGetToken = "/v3/auth/tokens"
  21. urlNotebook = "/demanager/instances"
  22. urlTrainJob = "/training-jobs"
  23. urlResourceSpecs = "/job/resource-specs"
  24. urlTrainJobConfig = "/training-job-configs"
  25. errorCodeExceedLimit = "ModelArts.0118"
  26. //notebook 2.0
  27. urlNotebook2 = "/notebooks"
  28. //error code
  29. modelartsIllegalToken = "ModelArts.6401"
  30. NotebookNotFound = "ModelArts.6404"
  31. NotebookNoPermission = "ModelArts.6407"
  32. NotebookInvalid = "ModelArts.6400"
  33. UnknownErrorPrefix = "UNKNOWN:"
  34. )
  35. func getRestyClient() *resty.Client {
  36. if restyClient == nil {
  37. restyClient = resty.New()
  38. restyClient.SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true})
  39. }
  40. return restyClient
  41. }
  42. func checkSetting() {
  43. if len(HOST) != 0 && len(TOKEN) != 0 && restyClient != nil {
  44. return
  45. }
  46. err := getToken()
  47. if err != nil {
  48. log.Error("getToken failed:%v", err)
  49. }
  50. }
  51. func getToken() error {
  52. HOST = setting.ModelArtsHost
  53. client := getRestyClient()
  54. params := models.GetTokenParams{
  55. Auth: models.Auth{
  56. Identity: models.Identity{
  57. Methods: []string{methodPassword},
  58. Password: models.Password{
  59. User: models.NotebookUser{
  60. Name: setting.ModelArtsUsername,
  61. Password: setting.ModelArtsPassword,
  62. Domain: models.Domain{
  63. Name: setting.ModelArtsDomain,
  64. },
  65. },
  66. },
  67. },
  68. Scope: models.Scope{
  69. Project: models.Project{
  70. Name: setting.ProjectName,
  71. },
  72. },
  73. },
  74. }
  75. res, err := client.R().
  76. SetHeader("Content-Type", "application/json").
  77. SetBody(params).
  78. Post(setting.IamHost + urlGetToken)
  79. if err != nil {
  80. return fmt.Errorf("resty getToken: %v", err)
  81. }
  82. if res.StatusCode() != http.StatusCreated {
  83. return fmt.Errorf("getToken failed:%s", res.String())
  84. }
  85. TOKEN = res.Header().Get("X-Subject-Token")
  86. return nil
  87. }
  88. func CreateJob(createJobParams models.CreateNotebookParams) (*models.CreateNotebookResult, error) {
  89. checkSetting()
  90. client := getRestyClient()
  91. var result models.CreateNotebookResult
  92. retry := 0
  93. sendjob:
  94. res, err := client.R().
  95. SetHeader("Content-Type", "application/json").
  96. SetAuthToken(TOKEN).
  97. SetBody(createJobParams).
  98. SetResult(&result).
  99. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook)
  100. if err != nil {
  101. return nil, fmt.Errorf("resty create notebook: %s", err)
  102. }
  103. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  104. retry++
  105. _ = getToken()
  106. goto sendjob
  107. }
  108. var response models.NotebookResult
  109. err = json.Unmarshal(res.Body(), &response)
  110. if err != nil {
  111. log.Error("json.Unmarshal failed: %s", err.Error())
  112. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  113. }
  114. if len(response.ErrorCode) != 0 {
  115. log.Error("createNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  116. if response.ErrorCode == errorCodeExceedLimit {
  117. response.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
  118. }
  119. return &result, fmt.Errorf("createNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  120. }
  121. return &result, nil
  122. }
  123. func GetJob(jobID string) (*models.GetNotebookResult, error) {
  124. checkSetting()
  125. client := getRestyClient()
  126. var result models.GetNotebookResult
  127. retry := 0
  128. sendjob:
  129. res, err := client.R().
  130. SetHeader("Content-Type", "application/json").
  131. SetAuthToken(TOKEN).
  132. SetResult(&result).
  133. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID)
  134. if err != nil {
  135. return nil, fmt.Errorf("resty GetJob: %v", err)
  136. }
  137. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  138. retry++
  139. _ = getToken()
  140. goto sendjob
  141. }
  142. var response models.NotebookResult
  143. err = json.Unmarshal(res.Body(), &response)
  144. if err != nil {
  145. log.Error("json.Unmarshal failed: %s", err.Error())
  146. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  147. }
  148. if len(response.ErrorCode) != 0 {
  149. log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  150. return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  151. }
  152. return &result, nil
  153. }
  154. func GetNotebook2(jobID string) (*models.GetNotebook2Result, error) {
  155. checkSetting()
  156. client := getRestyClient()
  157. var result models.GetNotebook2Result
  158. retry := 0
  159. sendjob:
  160. res, err := client.R().
  161. SetHeader("Content-Type", "application/json").
  162. SetAuthToken(TOKEN).
  163. SetResult(&result).
  164. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)
  165. if err != nil {
  166. return nil, fmt.Errorf("resty GetJob: %v", err)
  167. }
  168. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  169. retry++
  170. _ = getToken()
  171. goto sendjob
  172. }
  173. var response models.NotebookResult
  174. err = json.Unmarshal(res.Body(), &response)
  175. if err != nil {
  176. log.Error("json.Unmarshal failed: %s", err.Error())
  177. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  178. }
  179. if len(response.ErrorCode) != 0 {
  180. log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  181. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  182. retry++
  183. _ = getToken()
  184. goto sendjob
  185. }
  186. return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  187. }
  188. return &result, nil
  189. }
  190. func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
  191. checkSetting()
  192. client := getRestyClient()
  193. var result models.NotebookActionResult
  194. retry := 0
  195. sendjob:
  196. res, err := client.R().
  197. SetHeader("Content-Type", "application/json").
  198. SetBody(param).
  199. SetAuthToken(TOKEN).
  200. SetResult(&result).
  201. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID + "/action")
  202. if err != nil {
  203. return &result, fmt.Errorf("resty StopJob: %v", err)
  204. }
  205. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  206. retry++
  207. _ = getToken()
  208. goto sendjob
  209. }
  210. var response models.NotebookResult
  211. err = json.Unmarshal(res.Body(), &response)
  212. if err != nil {
  213. log.Error("json.Unmarshal failed: %s", err.Error())
  214. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  215. }
  216. if len(response.ErrorCode) != 0 {
  217. log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  218. return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  219. }
  220. return &result, nil
  221. }
  222. func ManageNotebook2(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
  223. checkSetting()
  224. client := getRestyClient()
  225. var result models.NotebookActionResult
  226. retry := 0
  227. sendjob:
  228. res, err := client.R().
  229. SetHeader("Content-Type", "application/json").
  230. SetAuthToken(TOKEN).
  231. SetResult(&result).
  232. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID + "/" + param.Action + "?duration=" + strconv.Itoa(autoStopDurationMs))
  233. if err != nil {
  234. return &result, fmt.Errorf("resty ManageNotebook2: %v", err)
  235. }
  236. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  237. retry++
  238. _ = getToken()
  239. goto sendjob
  240. }
  241. var response models.NotebookResult
  242. err = json.Unmarshal(res.Body(), &response)
  243. if err != nil {
  244. log.Error("json.Unmarshal failed: %s", err.Error())
  245. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  246. }
  247. if res.StatusCode() == http.StatusBadGateway {
  248. return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  249. }
  250. if len(response.ErrorCode) != 0 {
  251. log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  252. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  253. retry++
  254. _ = getToken()
  255. goto sendjob
  256. }
  257. return &result, fmt.Errorf("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  258. }
  259. return &result, nil
  260. }
  261. func DelNotebook(jobID string) (*models.NotebookDelResult, error) {
  262. checkSetting()
  263. client := getRestyClient()
  264. var result models.NotebookDelResult
  265. retry := 0
  266. sendjob:
  267. res, err := client.R().
  268. SetHeader("Content-Type", "application/json").
  269. SetAuthToken(TOKEN).
  270. SetResult(&result).
  271. Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID)
  272. if err != nil {
  273. return &result, fmt.Errorf("resty DelJob: %v", err)
  274. }
  275. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  276. retry++
  277. _ = getToken()
  278. goto sendjob
  279. }
  280. var response models.NotebookResult
  281. err = json.Unmarshal(res.Body(), &response)
  282. if err != nil {
  283. log.Error("json.Unmarshal failed: %s", err.Error())
  284. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  285. }
  286. if len(response.ErrorCode) != 0 {
  287. log.Error("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  288. return &result, fmt.Errorf("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  289. }
  290. return &result, nil
  291. }
  292. func DelNotebook2(jobID string) (*models.NotebookDelResult, error) {
  293. checkSetting()
  294. client := getRestyClient()
  295. var result models.NotebookDelResult
  296. retry := 0
  297. sendjob:
  298. res, err := client.R().
  299. SetHeader("Content-Type", "application/json").
  300. SetAuthToken(TOKEN).
  301. SetResult(&result).
  302. Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)
  303. if err != nil {
  304. return &result, fmt.Errorf("resty DelJob: %v", err)
  305. }
  306. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  307. retry++
  308. _ = getToken()
  309. goto sendjob
  310. }
  311. var response models.NotebookResult
  312. err = json.Unmarshal(res.Body(), &response)
  313. if err != nil {
  314. log.Error("json.Unmarshal failed: %s", err.Error())
  315. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  316. }
  317. if len(response.ErrorCode) != 0 {
  318. log.Error("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  319. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  320. retry++
  321. _ = getToken()
  322. goto sendjob
  323. }
  324. return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  325. }
  326. return &result, nil
  327. }
  328. func DelJob(jobID string) (*models.NotebookDelResult, error) {
  329. checkSetting()
  330. client := getRestyClient()
  331. var result models.NotebookDelResult
  332. retry := 0
  333. sendjob:
  334. res, err := client.R().
  335. SetHeader("Content-Type", "application/json").
  336. SetAuthToken(TOKEN).
  337. SetResult(&result).
  338. Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID)
  339. if err != nil {
  340. return &result, fmt.Errorf("resty DelJob: %v", err)
  341. }
  342. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  343. retry++
  344. _ = getToken()
  345. goto sendjob
  346. }
  347. var response models.NotebookResult
  348. err = json.Unmarshal(res.Body(), &response)
  349. if err != nil {
  350. log.Error("json.Unmarshal failed: %s", err.Error())
  351. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  352. }
  353. if len(response.ErrorCode) != 0 {
  354. log.Error("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  355. return &result, fmt.Errorf("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  356. }
  357. return &result, nil
  358. }
  359. func GetJobToken(jobID string) (*models.NotebookGetJobTokenResult, error) {
  360. checkSetting()
  361. client := getRestyClient()
  362. var result models.NotebookGetJobTokenResult
  363. retry := 0
  364. sendjob:
  365. res, err := client.R().
  366. SetHeader("Content-Type", "application/json").
  367. SetAuthToken(TOKEN).
  368. SetResult(&result).
  369. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID + "/token")
  370. if err != nil {
  371. return &result, fmt.Errorf("resty GetJobToken: %v", err)
  372. }
  373. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  374. retry++
  375. _ = getToken()
  376. goto sendjob
  377. }
  378. var response models.NotebookResult
  379. err = json.Unmarshal(res.Body(), &response)
  380. if err != nil {
  381. log.Error("json.Unmarshal failed: %s", err.Error())
  382. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  383. }
  384. if len(response.ErrorCode) != 0 {
  385. log.Error("GetJobToken failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  386. return &result, fmt.Errorf("GetJobToken failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  387. }
  388. return &result, nil
  389. }
  390. func createTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) {
  391. checkSetting()
  392. client := getRestyClient()
  393. var result models.CreateTrainJobResult
  394. retry := 0
  395. sendjob:
  396. res, err := client.R().
  397. SetHeader("Content-Type", "application/json").
  398. SetAuthToken(TOKEN).
  399. SetBody(createJobParams).
  400. SetResult(&result).
  401. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  402. if err != nil {
  403. return nil, fmt.Errorf("resty create train-job: %s", err)
  404. }
  405. req, _ := json.Marshal(createJobParams)
  406. log.Info("%s", req)
  407. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  408. retry++
  409. _ = getToken()
  410. goto sendjob
  411. }
  412. if res.StatusCode() != http.StatusOK {
  413. var temp models.ErrorResult
  414. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  415. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  416. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  417. }
  418. log.Error("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  419. bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
  420. dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
  421. if temp.ErrorMsg == bootFileErrorMsg {
  422. log.Error("启动文件错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  423. return &result, fmt.Errorf("启动文件错误!")
  424. }
  425. if temp.ErrorMsg == dataSetErrorMsg {
  426. log.Error("数据集错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  427. return &result, fmt.Errorf("数据集错误!")
  428. }
  429. if res.StatusCode() == http.StatusBadGateway {
  430. return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  431. } else {
  432. return &result, fmt.Errorf("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  433. }
  434. }
  435. if !result.IsSuccess {
  436. log.Error("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  437. return &result, fmt.Errorf("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  438. }
  439. return &result, nil
  440. }
  441. func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) {
  442. checkSetting()
  443. client := getRestyClient()
  444. var result models.CreateTrainJobResult
  445. retry := 0
  446. sendjob:
  447. res, err := client.R().
  448. SetHeader("Content-Type", "application/json").
  449. SetAuthToken(TOKEN).
  450. SetBody(createJobParams).
  451. SetResult(&result).
  452. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  453. if err != nil {
  454. return nil, fmt.Errorf("resty create train-job: %s", err)
  455. }
  456. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  457. retry++
  458. _ = getToken()
  459. goto sendjob
  460. }
  461. if res.StatusCode() != http.StatusOK {
  462. var temp models.ErrorResult
  463. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  464. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  465. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  466. }
  467. log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  468. bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
  469. dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
  470. if temp.ErrorMsg == bootFileErrorMsg {
  471. log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  472. return &result, fmt.Errorf("启动文件错误!")
  473. }
  474. if temp.ErrorMsg == dataSetErrorMsg {
  475. log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  476. return &result, fmt.Errorf("数据集错误!")
  477. }
  478. if res.StatusCode() == http.StatusBadGateway {
  479. return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  480. } else {
  481. return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  482. }
  483. }
  484. if !result.IsSuccess {
  485. log.Error("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  486. return &result, fmt.Errorf("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  487. }
  488. return &result, nil
  489. }
  490. func createTrainJobVersion(createJobVersionParams models.CreateTrainJobVersionParams, jobID string) (*models.CreateTrainJobResult, error) {
  491. checkSetting()
  492. client := getRestyClient()
  493. var result models.CreateTrainJobResult
  494. retry := 0
  495. sendjob:
  496. res, err := client.R().
  497. SetHeader("Content-Type", "application/json").
  498. SetAuthToken(TOKEN).
  499. SetBody(createJobVersionParams).
  500. SetResult(&result).
  501. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
  502. if err != nil {
  503. return nil, fmt.Errorf("resty create train-job version: %s", err)
  504. }
  505. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  506. retry++
  507. _ = getToken()
  508. goto sendjob
  509. }
  510. if res.StatusCode() != http.StatusOK {
  511. var temp models.ErrorResult
  512. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  513. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  514. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  515. }
  516. log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  517. bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
  518. dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
  519. if temp.ErrorMsg == bootFileErrorMsg {
  520. log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  521. return &result, fmt.Errorf("启动文件错误!")
  522. }
  523. if temp.ErrorMsg == dataSetErrorMsg {
  524. log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  525. return &result, fmt.Errorf("数据集错误!")
  526. }
  527. if res.StatusCode() == http.StatusBadGateway {
  528. return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  529. } else {
  530. return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  531. }
  532. }
  533. if !result.IsSuccess {
  534. log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  535. return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  536. }
  537. return &result, nil
  538. }
  539. func createTrainJobVersionUserImage(createJobVersionParams models.CreateTrainJobVersionUserImageParams, jobID string) (*models.CreateTrainJobResult, error) {
  540. checkSetting()
  541. client := getRestyClient()
  542. var result models.CreateTrainJobResult
  543. retry := 0
  544. sendjob:
  545. res, err := client.R().
  546. SetHeader("Content-Type", "application/json").
  547. SetAuthToken(TOKEN).
  548. SetBody(createJobVersionParams).
  549. SetResult(&result).
  550. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
  551. if err != nil {
  552. return nil, fmt.Errorf("resty create train-job version: %s", err)
  553. }
  554. req, _ := json.Marshal(createJobVersionParams)
  555. log.Info("%s", req)
  556. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  557. retry++
  558. _ = getToken()
  559. goto sendjob
  560. }
  561. if res.StatusCode() != http.StatusOK {
  562. var temp models.ErrorResult
  563. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  564. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  565. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  566. }
  567. BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
  568. DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
  569. if temp.ErrorMsg == BootFileErrorMsg {
  570. log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  571. return &result, fmt.Errorf("启动文件错误!")
  572. }
  573. if temp.ErrorMsg == DataSetErrorMsg {
  574. log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  575. return &result, fmt.Errorf("数据集错误!")
  576. }
  577. return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  578. }
  579. if !result.IsSuccess {
  580. log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  581. return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  582. }
  583. return &result, nil
  584. }
  585. func GetResourceSpecs() (*models.GetResourceSpecsResult, error) {
  586. checkSetting()
  587. client := getRestyClient()
  588. var result models.GetResourceSpecsResult
  589. retry := 0
  590. sendjob:
  591. res, err := client.R().
  592. SetHeader("Content-Type", "application/json").
  593. SetAuthToken(TOKEN).
  594. SetResult(&result).
  595. Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs)
  596. if err != nil {
  597. return nil, fmt.Errorf("resty GetResourceSpecs: %v", err)
  598. }
  599. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  600. retry++
  601. _ = getToken()
  602. goto sendjob
  603. }
  604. if res.StatusCode() != http.StatusOK {
  605. var temp models.ErrorResult
  606. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  607. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  608. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  609. }
  610. log.Error("GetResourceSpecs failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  611. return &result, fmt.Errorf("GetResourceSpecs failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  612. }
  613. if !result.IsSuccess {
  614. log.Error("GetResourceSpecs failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  615. return &result, fmt.Errorf("GetResourceSpecs failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  616. }
  617. return &result, nil
  618. }
  619. func CreateTrainJobConfig(req models.CreateConfigParams) (*models.CreateTrainJobConfigResult, error) {
  620. checkSetting()
  621. client := getRestyClient()
  622. var result models.CreateTrainJobConfigResult
  623. retry := 0
  624. sendjob:
  625. res, err := client.R().
  626. SetHeader("Content-Type", "application/json").
  627. SetAuthToken(TOKEN).
  628. SetBody(req).
  629. SetResult(&result).
  630. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig)
  631. if err != nil {
  632. return nil, fmt.Errorf("resty CreateTrainJobConfig: %s", err)
  633. }
  634. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  635. retry++
  636. _ = getToken()
  637. goto sendjob
  638. }
  639. if res.StatusCode() != http.StatusOK {
  640. var temp models.ErrorResult
  641. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  642. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  643. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  644. }
  645. log.Error("CreateTrainJobConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  646. return &result, fmt.Errorf("CreateTrainJobConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  647. }
  648. if !result.IsSuccess {
  649. log.Error("CreateTrainJobConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  650. return &result, fmt.Errorf("CreateTrainJobConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  651. }
  652. return &result, nil
  653. }
  654. func GetConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
  655. checkSetting()
  656. client := getRestyClient()
  657. var result models.GetConfigListResult
  658. retry := 0
  659. sendjob:
  660. res, err := client.R().
  661. SetQueryParams(map[string]string{
  662. "per_page": strconv.Itoa(perPage),
  663. "page": strconv.Itoa(page),
  664. "sortBy": sortBy,
  665. "order": order,
  666. "search_content": searchContent,
  667. "config_type": configType,
  668. }).
  669. SetAuthToken(TOKEN).
  670. SetResult(&result).
  671. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig)
  672. if err != nil {
  673. return nil, fmt.Errorf("resty GetConfigList: %v", err)
  674. }
  675. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  676. retry++
  677. _ = getToken()
  678. goto sendjob
  679. }
  680. if res.StatusCode() != http.StatusOK {
  681. var temp models.ErrorResult
  682. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  683. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  684. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  685. }
  686. log.Error("GetConfigList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  687. return &result, fmt.Errorf("获取参数配置列表失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  688. }
  689. if !result.IsSuccess {
  690. log.Error("GetConfigList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  691. return &result, fmt.Errorf("获取参数配置列表失败(%s): %s", result.ErrorCode, result.ErrorMsg)
  692. }
  693. return &result, nil
  694. }
  695. func GetParaConfig(configName, configType string) (models.GetConfigResult, error) {
  696. checkSetting()
  697. client := getRestyClient()
  698. var result models.GetConfigResult
  699. retry := 0
  700. sendjob:
  701. res, err := client.R().
  702. SetQueryParams(map[string]string{
  703. "config_type": configType,
  704. }).
  705. SetAuthToken(TOKEN).
  706. SetResult(&result).
  707. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig + "/" + configName)
  708. if err != nil {
  709. return result, fmt.Errorf("resty GetParaConfig: %v", err)
  710. }
  711. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  712. retry++
  713. _ = getToken()
  714. goto sendjob
  715. }
  716. if res.StatusCode() != http.StatusOK {
  717. var temp models.ErrorResult
  718. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  719. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  720. return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  721. }
  722. log.Error("GetParaConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  723. return result, fmt.Errorf("获取参数配置详情失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  724. }
  725. if !result.IsSuccess {
  726. log.Error("GetParaConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  727. return result, fmt.Errorf("获取参数配置详情失败(%s): %s", result.ErrorCode, result.ErrorMsg)
  728. }
  729. return result, nil
  730. }
  731. func GetTrainJob(jobID, versionID string) (*models.GetTrainJobResult, error) {
  732. checkSetting()
  733. client := getRestyClient()
  734. var result models.GetTrainJobResult
  735. retry := 0
  736. sendjob:
  737. res, err := client.R().
  738. SetAuthToken(TOKEN).
  739. SetResult(&result).
  740. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID)
  741. if err != nil {
  742. return nil, fmt.Errorf("resty GetTrainJob: %v", err)
  743. }
  744. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  745. retry++
  746. _ = getToken()
  747. goto sendjob
  748. }
  749. if res.StatusCode() != http.StatusOK {
  750. var temp models.ErrorResult
  751. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  752. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  753. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  754. }
  755. log.Error("GetTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  756. return &result, fmt.Errorf("获取作业详情失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  757. }
  758. if !result.IsSuccess {
  759. log.Error("GetTrainJob(%s) failed", jobID)
  760. return &result, fmt.Errorf("获取作业详情失败")
  761. }
  762. return &result, nil
  763. }
  764. func GetTrainJobLog(jobID, versionID, baseLine, logFile, order string, lines int) (*models.GetTrainJobLogResult, error) {
  765. checkSetting()
  766. client := getRestyClient()
  767. var result models.GetTrainJobLogResult
  768. retry := 0
  769. sendjob:
  770. res, err := client.R().
  771. SetQueryParams(map[string]string{
  772. "base_line": baseLine,
  773. "lines": strconv.Itoa(lines),
  774. "log_file": logFile,
  775. "order": order,
  776. }).
  777. SetAuthToken(TOKEN).
  778. SetResult(&result).
  779. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/aom-log")
  780. if err != nil {
  781. return nil, fmt.Errorf("resty GetTrainJobLog: %v", err)
  782. }
  783. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  784. retry++
  785. _ = getToken()
  786. goto sendjob
  787. }
  788. if res.StatusCode() != http.StatusOK {
  789. var temp models.ErrorResult
  790. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  791. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  792. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  793. }
  794. log.Error("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  795. return &result, fmt.Errorf("获取作业日志失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  796. }
  797. if !result.IsSuccess {
  798. log.Error("GetTrainJobLog(%s) failed", jobID)
  799. return &result, fmt.Errorf("获取作业日志失败:%s", result.ErrorMsg)
  800. }
  801. return &result, nil
  802. }
  803. func GetTrainJobLogFileNames(jobID, versionID string) (*models.GetTrainJobLogFileNamesResult, error) {
  804. checkSetting()
  805. client := getRestyClient()
  806. var result models.GetTrainJobLogFileNamesResult
  807. retry := 0
  808. sendjob:
  809. res, err := client.R().
  810. SetAuthToken(TOKEN).
  811. SetResult(&result).
  812. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/log/file-names")
  813. if err != nil {
  814. return nil, fmt.Errorf("resty GetTrainJobLogFileNames: %v", err)
  815. }
  816. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  817. retry++
  818. _ = getToken()
  819. goto sendjob
  820. }
  821. if res.StatusCode() != http.StatusOK {
  822. var temp models.ErrorResult
  823. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  824. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  825. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  826. }
  827. log.Error("GetTrainJobLogFileNames failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  828. return &result, fmt.Errorf("GetTrainJobLogFileNames failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  829. }
  830. if !result.IsSuccess {
  831. log.Error("GetTrainJobLogFileNames(%s) failed", jobID)
  832. return &result, fmt.Errorf("获取作业日志文件失败:%s", result.ErrorMsg)
  833. }
  834. return &result, nil
  835. }
  836. func DelTrainJob(jobID string) (*models.TrainJobResult, error) {
  837. checkSetting()
  838. client := getRestyClient()
  839. var result models.TrainJobResult
  840. retry := 0
  841. sendjob:
  842. res, err := client.R().
  843. SetAuthToken(TOKEN).
  844. SetResult(&result).
  845. Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID)
  846. if err != nil {
  847. return &result, fmt.Errorf("resty DelTrainJob: %v", err)
  848. }
  849. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  850. retry++
  851. _ = getToken()
  852. goto sendjob
  853. }
  854. if res.StatusCode() != http.StatusOK {
  855. var temp models.ErrorResult
  856. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  857. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  858. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  859. }
  860. log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  861. return &result, fmt.Errorf("删除训练作业失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  862. }
  863. if !result.IsSuccess {
  864. log.Error("DelTrainJob(%s) failed", jobID)
  865. return &result, fmt.Errorf("删除训练作业失败:%s", result.ErrorMsg)
  866. }
  867. return &result, nil
  868. }
  869. func StopTrainJob(jobID, versionID string) (*models.TrainJobResult, error) {
  870. checkSetting()
  871. client := getRestyClient()
  872. var result models.TrainJobResult
  873. retry := 0
  874. sendjob:
  875. res, err := client.R().
  876. SetAuthToken(TOKEN).
  877. SetResult(&result).
  878. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/stop")
  879. if err != nil {
  880. return &result, fmt.Errorf("resty StopTrainJob: %v", err)
  881. }
  882. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  883. retry++
  884. _ = getToken()
  885. goto sendjob
  886. }
  887. if res.StatusCode() != http.StatusOK {
  888. var temp models.ErrorResult
  889. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  890. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  891. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  892. }
  893. log.Error("StopTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  894. return &result, fmt.Errorf("停止训练作业失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  895. }
  896. if !result.IsSuccess {
  897. log.Error("StopTrainJob(%s) failed", jobID)
  898. return &result, fmt.Errorf("停止训练作业失败:%s", result.ErrorMsg)
  899. }
  900. return &result, nil
  901. }
  902. func DelTrainJobVersion(jobID string, versionID string) (*models.TrainJobResult, error) {
  903. checkSetting()
  904. client := getRestyClient()
  905. var result models.TrainJobResult
  906. retry := 0
  907. sendjob:
  908. res, err := client.R().
  909. SetAuthToken(TOKEN).
  910. SetResult(&result).
  911. Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID)
  912. if err != nil {
  913. return &result, fmt.Errorf("resty DelTrainJobVersion: %v", err)
  914. }
  915. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  916. retry++
  917. _ = getToken()
  918. goto sendjob
  919. }
  920. if res.StatusCode() != http.StatusOK {
  921. var temp models.ErrorResult
  922. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  923. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  924. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  925. }
  926. log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  927. return &result, fmt.Errorf("删除训练作业版本失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  928. }
  929. if !result.IsSuccess {
  930. log.Error("DelTrainJob(%s) failed", jobID)
  931. return &result, fmt.Errorf("删除训练作业版本失败:%s", result.ErrorMsg)
  932. }
  933. return &result, nil
  934. }
  935. func createInferenceJob(createJobParams models.CreateInferenceJobParams) (*models.CreateTrainJobResult, error) {
  936. checkSetting()
  937. client := getRestyClient()
  938. var result models.CreateTrainJobResult
  939. retry := 0
  940. sendjob:
  941. res, err := client.R().
  942. SetHeader("Content-Type", "application/json").
  943. SetAuthToken(TOKEN).
  944. SetBody(createJobParams).
  945. SetResult(&result).
  946. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  947. if err != nil {
  948. return nil, fmt.Errorf("resty create inference-job: %s", err)
  949. }
  950. req, _ := json.Marshal(createJobParams)
  951. log.Info("%s", req)
  952. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  953. retry++
  954. _ = getToken()
  955. goto sendjob
  956. }
  957. if res.StatusCode() != http.StatusOK {
  958. var temp models.ErrorResult
  959. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  960. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  961. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  962. }
  963. log.Error("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  964. BootFileErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.BootFileUrl + "'."
  965. DataSetErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.DataUrl + "'."
  966. if temp.ErrorMsg == BootFileErrorMsg {
  967. log.Error("启动文件错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  968. return &result, fmt.Errorf("启动文件错误!")
  969. }
  970. if temp.ErrorMsg == DataSetErrorMsg {
  971. log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  972. return &result, fmt.Errorf("数据集错误!")
  973. }
  974. if res.StatusCode() == http.StatusBadGateway {
  975. return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  976. } else {
  977. return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  978. }
  979. }
  980. if !result.IsSuccess {
  981. log.Error("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  982. return &result, fmt.Errorf("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  983. }
  984. return &result, nil
  985. }
  986. func createInferenceJobUserImage(createJobParams models.CreateInfUserImageParams) (*models.CreateTrainJobResult, error) {
  987. checkSetting()
  988. client := getRestyClient()
  989. var result models.CreateTrainJobResult
  990. retry := 0
  991. sendjob:
  992. res, err := client.R().
  993. SetHeader("Content-Type", "application/json").
  994. SetAuthToken(TOKEN).
  995. SetBody(createJobParams).
  996. SetResult(&result).
  997. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  998. if err != nil {
  999. return nil, fmt.Errorf("resty create train-job: %s", err)
  1000. }
  1001. req, _ := json.Marshal(createJobParams)
  1002. log.Info("%s", req)
  1003. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  1004. retry++
  1005. _ = getToken()
  1006. goto sendjob
  1007. }
  1008. if res.StatusCode() != http.StatusOK {
  1009. var temp models.ErrorResult
  1010. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  1011. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1012. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1013. }
  1014. log.Error("createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1015. bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
  1016. dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
  1017. if temp.ErrorMsg == bootFileErrorMsg {
  1018. log.Error("启动文件错误!createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1019. return &result, fmt.Errorf("启动文件错误!")
  1020. }
  1021. if temp.ErrorMsg == dataSetErrorMsg {
  1022. log.Error("数据集错误!createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1023. return &result, fmt.Errorf("数据集错误!")
  1024. }
  1025. if res.StatusCode() == http.StatusBadGateway {
  1026. return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1027. } else {
  1028. return &result, fmt.Errorf("createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1029. }
  1030. }
  1031. if !result.IsSuccess {
  1032. log.Error("createInferenceJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  1033. return &result, fmt.Errorf("createInferenceJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  1034. }
  1035. return &result, nil
  1036. }
  1037. func createNotebook2(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) {
  1038. checkSetting()
  1039. client := getRestyClient()
  1040. var result models.CreateNotebookResult
  1041. retry := 0
  1042. sendjob:
  1043. res, err := client.R().
  1044. SetHeader("Content-Type", "application/json").
  1045. SetAuthToken(TOKEN).
  1046. SetBody(createJobParams).
  1047. SetResult(&result).
  1048. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2)
  1049. if err != nil {
  1050. return nil, fmt.Errorf("resty create notebook2: %s", err)
  1051. }
  1052. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  1053. retry++
  1054. _ = getToken()
  1055. goto sendjob
  1056. }
  1057. var response models.NotebookResult
  1058. err = json.Unmarshal(res.Body(), &response)
  1059. if err != nil {
  1060. log.Error("json.Unmarshal failed: %s", err.Error())
  1061. return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
  1062. }
  1063. if res.StatusCode() == http.StatusBadGateway {
  1064. return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  1065. }
  1066. if len(response.ErrorCode) != 0 {
  1067. log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  1068. if response.ErrorCode == errorCodeExceedLimit {
  1069. response.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
  1070. }
  1071. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  1072. retry++
  1073. _ = getToken()
  1074. goto sendjob
  1075. }
  1076. return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  1077. }
  1078. return &result, nil
  1079. }
  1080. func GetTrainJobMetricStatistic(jobID, versionID, podName string) (*models.GetTrainJobMetricStatisticResult, error) {
  1081. checkSetting()
  1082. client := getRestyClient()
  1083. var result models.GetTrainJobMetricStatisticResult
  1084. retry := 0
  1085. sendjob:
  1086. res, err := client.R().
  1087. SetAuthToken(TOKEN).
  1088. SetResult(&result).
  1089. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/pod/" + podName + "/metric-statistic?statistic_type=each")
  1090. if err != nil {
  1091. return nil, fmt.Errorf("resty GetTrainJobMetricStatistic: %v", err)
  1092. }
  1093. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  1094. retry++
  1095. _ = getToken()
  1096. goto sendjob
  1097. }
  1098. if res.StatusCode() != http.StatusOK {
  1099. var temp models.ErrorResult
  1100. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  1101. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1102. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1103. }
  1104. log.Error("GetTrainJobMetricStatistic failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1105. return &result, fmt.Errorf("GetTrainJobMetricStatistic failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1106. }
  1107. if !result.IsSuccess {
  1108. log.Error("GetTrainJobMetricStatistic(%s) failed", jobID)
  1109. return &result, fmt.Errorf("获取任务资源占用情况失败:%s", result.ErrorMsg)
  1110. }
  1111. return &result, nil
  1112. }
  1113. func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) {
  1114. checkSetting()
  1115. client := getRestyClient()
  1116. var result models.GetTrainJobListResult
  1117. retry := 0
  1118. sendjob:
  1119. res, err := client.R().
  1120. SetQueryParams(map[string]string{
  1121. "per_page": strconv.Itoa(perPage),
  1122. "page": strconv.Itoa(page),
  1123. "sortBy": sortBy,
  1124. "order": order,
  1125. "search_content": searchContent,
  1126. }).
  1127. SetAuthToken(TOKEN).
  1128. SetResult(&result).
  1129. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  1130. if err != nil {
  1131. return nil, fmt.Errorf("resty GetTrainJobList: %v", err)
  1132. }
  1133. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  1134. retry++
  1135. _ = getToken()
  1136. goto sendjob
  1137. }
  1138. if res.StatusCode() != http.StatusOK {
  1139. var temp models.ErrorResult
  1140. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  1141. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1142. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1143. }
  1144. log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1145. return &result, fmt.Errorf(temp.ErrorMsg)
  1146. }
  1147. if !result.IsSuccess {
  1148. log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  1149. return &result, fmt.Errorf(result.ErrorMsg)
  1150. }
  1151. return &result, nil
  1152. }
  1153. func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) {
  1154. checkSetting()
  1155. client := getRestyClient()
  1156. var result models.GetTrainJobVersionListResult
  1157. retry := 0
  1158. sendjob:
  1159. res, err := client.R().
  1160. SetQueryParams(map[string]string{
  1161. "per_page": strconv.Itoa(perPage),
  1162. "page": strconv.Itoa(page),
  1163. }).
  1164. SetAuthToken(TOKEN).
  1165. SetResult(&result).
  1166. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
  1167. if err != nil {
  1168. return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err)
  1169. }
  1170. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  1171. retry++
  1172. _ = getToken()
  1173. goto sendjob
  1174. }
  1175. if res.StatusCode() != http.StatusOK {
  1176. var temp models.ErrorResult
  1177. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  1178. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1179. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1180. }
  1181. log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1182. return &result, fmt.Errorf(temp.ErrorMsg)
  1183. }
  1184. if !result.IsSuccess {
  1185. log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  1186. return &result, fmt.Errorf(result.ErrorMsg)
  1187. }
  1188. return &result, nil
  1189. }
  1190. func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) {
  1191. checkSetting()
  1192. client := getRestyClient()
  1193. var result models.GetNotebookListResult
  1194. retry := 0
  1195. sendjob:
  1196. res, err := client.R().
  1197. SetQueryParams(map[string]string{
  1198. "limit": strconv.Itoa(limit),
  1199. "offset": strconv.Itoa(offset),
  1200. "name": searchContent,
  1201. "sort_key": sortBy,
  1202. "sort_dir": order,
  1203. }).
  1204. SetAuthToken(TOKEN).
  1205. SetResult(&result).
  1206. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2)
  1207. if err != nil {
  1208. return nil, fmt.Errorf("resty GetNotebookList: %v", err)
  1209. }
  1210. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  1211. retry++
  1212. _ = getToken()
  1213. goto sendjob
  1214. }
  1215. if res.StatusCode() != http.StatusOK {
  1216. var temp models.ErrorResult
  1217. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  1218. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1219. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  1220. }
  1221. log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  1222. return &result, fmt.Errorf(temp.ErrorMsg)
  1223. }
  1224. return &result, nil
  1225. }