You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

resty.go 34 kB

4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago

  1. package modelarts
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "net/http"
  7. "strconv"
  8. "code.gitea.io/gitea/models"
  9. "code.gitea.io/gitea/modules/log"
  10. "code.gitea.io/gitea/modules/setting"
  11. "github.com/go-resty/resty/v2"
  12. )
  13. var (
  14. restyClient *resty.Client
  15. HOST string
  16. TOKEN string
  17. )
  18. const (
  19. methodPassword = "password"
  20. urlGetToken = "/v3/auth/tokens"
  21. urlNotebook = "/demanager/instances"
  22. urlTrainJob = "/training-jobs"
  23. urlResourceSpecs = "/job/resource-specs"
  24. urlTrainJobConfig = "/training-job-configs"
  25. errorCodeExceedLimit = "ModelArts.0118"
  26. //notebook 2.0
  27. urlNotebook2 = "/notebooks"
  28. //error code
  29. modelartsIllegalToken = "ModelArts.6401"
  30. NotebookNotFound = "ModelArts.6404"
  31. NotebookNoPermission = "ModelArts.6407"
  32. NotebookInvalid = "ModelArts.6400"
  33. )
  34. func getRestyClient() *resty.Client {
  35. if restyClient == nil {
  36. restyClient = resty.New()
  37. restyClient.SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true})
  38. }
  39. return restyClient
  40. }
  41. func checkSetting() {
  42. if len(HOST) != 0 && len(TOKEN) != 0 && restyClient != nil {
  43. return
  44. }
  45. err := getToken()
  46. if err != nil {
  47. log.Error("getToken failed:%v", err)
  48. }
  49. }
  50. func getToken() error {
  51. HOST = setting.ModelArtsHost
  52. client := getRestyClient()
  53. params := models.GetTokenParams{
  54. Auth: models.Auth{
  55. Identity: models.Identity{
  56. Methods: []string{methodPassword},
  57. Password: models.Password{
  58. User: models.NotebookUser{
  59. Name: setting.ModelArtsUsername,
  60. Password: setting.ModelArtsPassword,
  61. Domain: models.Domain{
  62. Name: setting.ModelArtsDomain,
  63. },
  64. },
  65. },
  66. },
  67. Scope: models.Scope{
  68. Project: models.Project{
  69. Name: setting.ProjectName,
  70. },
  71. },
  72. },
  73. }
  74. res, err := client.R().
  75. SetHeader("Content-Type", "application/json").
  76. SetBody(params).
  77. Post(setting.IamHost + urlGetToken)
  78. if err != nil {
  79. return fmt.Errorf("resty getToken: %v", err)
  80. }
  81. if res.StatusCode() != http.StatusCreated {
  82. return fmt.Errorf("getToken failed:%s", res.String())
  83. }
  84. TOKEN = res.Header().Get("X-Subject-Token")
  85. return nil
  86. }
  87. func CreateJob(createJobParams models.CreateNotebookParams) (*models.CreateNotebookResult, error) {
  88. checkSetting()
  89. client := getRestyClient()
  90. var result models.CreateNotebookResult
  91. retry := 0
  92. sendjob:
  93. res, err := client.R().
  94. SetHeader("Content-Type", "application/json").
  95. SetAuthToken(TOKEN).
  96. SetBody(createJobParams).
  97. SetResult(&result).
  98. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook)
  99. if err != nil {
  100. return nil, fmt.Errorf("resty create notebook: %s", err)
  101. }
  102. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  103. retry++
  104. _ = getToken()
  105. goto sendjob
  106. }
  107. var response models.NotebookResult
  108. err = json.Unmarshal(res.Body(), &response)
  109. if err != nil {
  110. log.Error("json.Unmarshal failed: %s", err.Error())
  111. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  112. }
  113. if len(response.ErrorCode) != 0 {
  114. log.Error("createNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  115. if response.ErrorCode == errorCodeExceedLimit {
  116. response.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
  117. }
  118. return &result, fmt.Errorf("createNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  119. }
  120. return &result, nil
  121. }
  122. func GetJob(jobID string) (*models.GetNotebookResult, error) {
  123. checkSetting()
  124. client := getRestyClient()
  125. var result models.GetNotebookResult
  126. retry := 0
  127. sendjob:
  128. res, err := client.R().
  129. SetHeader("Content-Type", "application/json").
  130. SetAuthToken(TOKEN).
  131. SetResult(&result).
  132. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID)
  133. if err != nil {
  134. return nil, fmt.Errorf("resty GetJob: %v", err)
  135. }
  136. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  137. retry++
  138. _ = getToken()
  139. goto sendjob
  140. }
  141. var response models.NotebookResult
  142. err = json.Unmarshal(res.Body(), &response)
  143. if err != nil {
  144. log.Error("json.Unmarshal failed: %s", err.Error())
  145. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  146. }
  147. if len(response.ErrorCode) != 0 {
  148. log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  149. return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  150. }
  151. return &result, nil
  152. }
  153. func GetNotebook2(jobID string) (*models.GetNotebook2Result, error) {
  154. checkSetting()
  155. client := getRestyClient()
  156. var result models.GetNotebook2Result
  157. retry := 0
  158. sendjob:
  159. res, err := client.R().
  160. SetHeader("Content-Type", "application/json").
  161. SetAuthToken(TOKEN).
  162. SetResult(&result).
  163. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)
  164. if err != nil {
  165. return nil, fmt.Errorf("resty GetJob: %v", err)
  166. }
  167. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  168. retry++
  169. _ = getToken()
  170. goto sendjob
  171. }
  172. var response models.NotebookResult
  173. err = json.Unmarshal(res.Body(), &response)
  174. if err != nil {
  175. log.Error("json.Unmarshal failed: %s", err.Error())
  176. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  177. }
  178. if len(response.ErrorCode) != 0 {
  179. log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  180. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  181. retry++
  182. _ = getToken()
  183. goto sendjob
  184. }
  185. return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  186. }
  187. return &result, nil
  188. }
  189. func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
  190. checkSetting()
  191. client := getRestyClient()
  192. var result models.NotebookActionResult
  193. retry := 0
  194. sendjob:
  195. res, err := client.R().
  196. SetHeader("Content-Type", "application/json").
  197. SetBody(param).
  198. SetAuthToken(TOKEN).
  199. SetResult(&result).
  200. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID + "/action")
  201. if err != nil {
  202. return &result, fmt.Errorf("resty StopJob: %v", err)
  203. }
  204. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  205. retry++
  206. _ = getToken()
  207. goto sendjob
  208. }
  209. var response models.NotebookResult
  210. err = json.Unmarshal(res.Body(), &response)
  211. if err != nil {
  212. log.Error("json.Unmarshal failed: %s", err.Error())
  213. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  214. }
  215. if len(response.ErrorCode) != 0 {
  216. log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  217. return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  218. }
  219. return &result, nil
  220. }
  221. func ManageNotebook2(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
  222. checkSetting()
  223. client := getRestyClient()
  224. var result models.NotebookActionResult
  225. retry := 0
  226. sendjob:
  227. res, err := client.R().
  228. SetHeader("Content-Type", "application/json").
  229. SetAuthToken(TOKEN).
  230. SetResult(&result).
  231. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID + "/" + param.Action + "?duration=" + strconv.Itoa(autoStopDurationMs))
  232. if err != nil {
  233. return &result, fmt.Errorf("resty ManageNotebook2: %v", err)
  234. }
  235. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  236. retry++
  237. _ = getToken()
  238. goto sendjob
  239. }
  240. var response models.NotebookResult
  241. err = json.Unmarshal(res.Body(), &response)
  242. if err != nil {
  243. log.Error("json.Unmarshal failed: %s", err.Error())
  244. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  245. }
  246. if len(response.ErrorCode) != 0 {
  247. log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  248. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  249. retry++
  250. _ = getToken()
  251. goto sendjob
  252. }
  253. return &result, fmt.Errorf("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  254. }
  255. return &result, nil
  256. }
  257. func DelNotebook(jobID string) (*models.NotebookDelResult, error) {
  258. checkSetting()
  259. client := getRestyClient()
  260. var result models.NotebookDelResult
  261. retry := 0
  262. sendjob:
  263. res, err := client.R().
  264. SetHeader("Content-Type", "application/json").
  265. SetAuthToken(TOKEN).
  266. SetResult(&result).
  267. Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID)
  268. if err != nil {
  269. return &result, fmt.Errorf("resty DelJob: %v", err)
  270. }
  271. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  272. retry++
  273. _ = getToken()
  274. goto sendjob
  275. }
  276. var response models.NotebookResult
  277. err = json.Unmarshal(res.Body(), &response)
  278. if err != nil {
  279. log.Error("json.Unmarshal failed: %s", err.Error())
  280. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  281. }
  282. if len(response.ErrorCode) != 0 {
  283. log.Error("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  284. return &result, fmt.Errorf("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  285. }
  286. return &result, nil
  287. }
  288. func DelNotebook2(jobID string) (*models.NotebookDelResult, error) {
  289. checkSetting()
  290. client := getRestyClient()
  291. var result models.NotebookDelResult
  292. retry := 0
  293. sendjob:
  294. res, err := client.R().
  295. SetHeader("Content-Type", "application/json").
  296. SetAuthToken(TOKEN).
  297. SetResult(&result).
  298. Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)
  299. if err != nil {
  300. return &result, fmt.Errorf("resty DelJob: %v", err)
  301. }
  302. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  303. retry++
  304. _ = getToken()
  305. goto sendjob
  306. }
  307. var response models.NotebookResult
  308. err = json.Unmarshal(res.Body(), &response)
  309. if err != nil {
  310. log.Error("json.Unmarshal failed: %s", err.Error())
  311. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  312. }
  313. if len(response.ErrorCode) != 0 {
  314. log.Error("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  315. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  316. retry++
  317. _ = getToken()
  318. goto sendjob
  319. }
  320. return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  321. }
  322. return &result, nil
  323. }
  324. func DelJob(jobID string) (*models.NotebookDelResult, error) {
  325. checkSetting()
  326. client := getRestyClient()
  327. var result models.NotebookDelResult
  328. retry := 0
  329. sendjob:
  330. res, err := client.R().
  331. SetHeader("Content-Type", "application/json").
  332. SetAuthToken(TOKEN).
  333. SetResult(&result).
  334. Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID)
  335. if err != nil {
  336. return &result, fmt.Errorf("resty DelJob: %v", err)
  337. }
  338. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  339. retry++
  340. _ = getToken()
  341. goto sendjob
  342. }
  343. var response models.NotebookResult
  344. err = json.Unmarshal(res.Body(), &response)
  345. if err != nil {
  346. log.Error("json.Unmarshal failed: %s", err.Error())
  347. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  348. }
  349. if len(response.ErrorCode) != 0 {
  350. log.Error("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  351. return &result, fmt.Errorf("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  352. }
  353. return &result, nil
  354. }
  355. func GetJobToken(jobID string) (*models.NotebookGetJobTokenResult, error) {
  356. checkSetting()
  357. client := getRestyClient()
  358. var result models.NotebookGetJobTokenResult
  359. retry := 0
  360. sendjob:
  361. res, err := client.R().
  362. SetHeader("Content-Type", "application/json").
  363. SetAuthToken(TOKEN).
  364. SetResult(&result).
  365. Get(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID + "/token")
  366. if err != nil {
  367. return &result, fmt.Errorf("resty GetJobToken: %v", err)
  368. }
  369. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  370. retry++
  371. _ = getToken()
  372. goto sendjob
  373. }
  374. var response models.NotebookResult
  375. err = json.Unmarshal(res.Body(), &response)
  376. if err != nil {
  377. log.Error("json.Unmarshal failed: %s", err.Error())
  378. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  379. }
  380. if len(response.ErrorCode) != 0 {
  381. log.Error("GetJobToken failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  382. return &result, fmt.Errorf("GetJobToken failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  383. }
  384. return &result, nil
  385. }
  386. func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) {
  387. checkSetting()
  388. client := getRestyClient()
  389. var result models.CreateTrainJobResult
  390. retry := 0
  391. sendjob:
  392. res, err := client.R().
  393. SetHeader("Content-Type", "application/json").
  394. SetAuthToken(TOKEN).
  395. SetBody(createJobParams).
  396. SetResult(&result).
  397. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  398. if err != nil {
  399. return nil, fmt.Errorf("resty create train-job: %s", err)
  400. }
  401. req, _ := json.Marshal(createJobParams)
  402. log.Info("%s", req)
  403. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  404. retry++
  405. _ = getToken()
  406. goto sendjob
  407. }
  408. if res.StatusCode() != http.StatusOK {
  409. var temp models.ErrorResult
  410. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  411. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  412. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  413. }
  414. log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  415. BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
  416. DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
  417. if temp.ErrorMsg == BootFileErrorMsg {
  418. log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  419. return &result, fmt.Errorf("启动文件错误!")
  420. }
  421. if temp.ErrorMsg == DataSetErrorMsg {
  422. log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  423. return &result, fmt.Errorf("数据集错误!")
  424. }
  425. return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  426. }
  427. if !result.IsSuccess {
  428. log.Error("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  429. return &result, fmt.Errorf("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  430. }
  431. return &result, nil
  432. }
  433. func createTrainJobVersion(createJobVersionParams models.CreateTrainJobVersionParams, jobID string) (*models.CreateTrainJobResult, error) {
  434. checkSetting()
  435. client := getRestyClient()
  436. var result models.CreateTrainJobResult
  437. retry := 0
  438. sendjob:
  439. res, err := client.R().
  440. SetHeader("Content-Type", "application/json").
  441. SetAuthToken(TOKEN).
  442. SetBody(createJobVersionParams).
  443. SetResult(&result).
  444. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
  445. if err != nil {
  446. return nil, fmt.Errorf("resty create train-job version: %s", err)
  447. }
  448. req, _ := json.Marshal(createJobVersionParams)
  449. log.Info("%s", req)
  450. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  451. retry++
  452. _ = getToken()
  453. goto sendjob
  454. }
  455. if res.StatusCode() != http.StatusOK {
  456. var temp models.ErrorResult
  457. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  458. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  459. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  460. }
  461. BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
  462. DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
  463. if temp.ErrorMsg == BootFileErrorMsg {
  464. log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  465. return &result, fmt.Errorf("启动文件错误!")
  466. }
  467. if temp.ErrorMsg == DataSetErrorMsg {
  468. log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  469. return &result, fmt.Errorf("数据集错误!")
  470. }
  471. return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  472. }
  473. if !result.IsSuccess {
  474. log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  475. return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  476. }
  477. return &result, nil
  478. }
  479. func GetResourceSpecs() (*models.GetResourceSpecsResult, error) {
  480. checkSetting()
  481. client := getRestyClient()
  482. var result models.GetResourceSpecsResult
  483. retry := 0
  484. sendjob:
  485. res, err := client.R().
  486. SetHeader("Content-Type", "application/json").
  487. SetAuthToken(TOKEN).
  488. SetResult(&result).
  489. Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs)
  490. if err != nil {
  491. return nil, fmt.Errorf("resty GetResourceSpecs: %v", err)
  492. }
  493. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  494. retry++
  495. _ = getToken()
  496. goto sendjob
  497. }
  498. if res.StatusCode() != http.StatusOK {
  499. var temp models.ErrorResult
  500. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  501. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  502. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  503. }
  504. log.Error("GetResourceSpecs failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  505. return &result, fmt.Errorf("GetResourceSpecs failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  506. }
  507. if !result.IsSuccess {
  508. log.Error("GetResourceSpecs failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  509. return &result, fmt.Errorf("GetResourceSpecs failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  510. }
  511. return &result, nil
  512. }
  513. func CreateTrainJobConfig(req models.CreateConfigParams) (*models.CreateTrainJobConfigResult, error) {
  514. checkSetting()
  515. client := getRestyClient()
  516. var result models.CreateTrainJobConfigResult
  517. retry := 0
  518. sendjob:
  519. res, err := client.R().
  520. SetHeader("Content-Type", "application/json").
  521. SetAuthToken(TOKEN).
  522. SetBody(req).
  523. SetResult(&result).
  524. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig)
  525. if err != nil {
  526. return nil, fmt.Errorf("resty CreateTrainJobConfig: %s", err)
  527. }
  528. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  529. retry++
  530. _ = getToken()
  531. goto sendjob
  532. }
  533. //temp, _ := json.Marshal(req)
  534. //log.Info("%s", temp)
  535. if res.StatusCode() != http.StatusOK {
  536. var temp models.ErrorResult
  537. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  538. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  539. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  540. }
  541. log.Error("CreateTrainJobConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  542. return &result, fmt.Errorf("CreateTrainJobConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  543. }
  544. if !result.IsSuccess {
  545. log.Error("CreateTrainJobConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  546. return &result, fmt.Errorf("CreateTrainJobConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  547. }
  548. return &result, nil
  549. }
  550. func GetConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
  551. checkSetting()
  552. client := getRestyClient()
  553. var result models.GetConfigListResult
  554. retry := 0
  555. sendjob:
  556. res, err := client.R().
  557. SetQueryParams(map[string]string{
  558. "per_page": strconv.Itoa(perPage),
  559. "page": strconv.Itoa(page),
  560. "sortBy": sortBy,
  561. "order": order,
  562. "search_content": searchContent,
  563. "config_type": configType,
  564. }).
  565. SetAuthToken(TOKEN).
  566. SetResult(&result).
  567. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig)
  568. if err != nil {
  569. return nil, fmt.Errorf("resty GetConfigList: %v", err)
  570. }
  571. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  572. retry++
  573. _ = getToken()
  574. goto sendjob
  575. }
  576. if res.StatusCode() != http.StatusOK {
  577. var temp models.ErrorResult
  578. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  579. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  580. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  581. }
  582. log.Error("GetConfigList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  583. return &result, fmt.Errorf("获取参数配置列表失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  584. }
  585. if !result.IsSuccess {
  586. log.Error("GetConfigList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  587. return &result, fmt.Errorf("获取参数配置列表失败(%s): %s", result.ErrorCode, result.ErrorMsg)
  588. }
  589. return &result, nil
  590. }
  591. func GetParaConfig(configName, configType string) (models.GetConfigResult, error) {
  592. checkSetting()
  593. client := getRestyClient()
  594. var result models.GetConfigResult
  595. retry := 0
  596. sendjob:
  597. res, err := client.R().
  598. SetQueryParams(map[string]string{
  599. "config_type": configType,
  600. }).
  601. SetAuthToken(TOKEN).
  602. SetResult(&result).
  603. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig + "/" + configName)
  604. if err != nil {
  605. return result, fmt.Errorf("resty GetParaConfig: %v", err)
  606. }
  607. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  608. retry++
  609. _ = getToken()
  610. goto sendjob
  611. }
  612. if res.StatusCode() != http.StatusOK {
  613. var temp models.ErrorResult
  614. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  615. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  616. return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  617. }
  618. log.Error("GetParaConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  619. return result, fmt.Errorf("获取参数配置详情失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  620. }
  621. if !result.IsSuccess {
  622. log.Error("GetParaConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  623. return result, fmt.Errorf("获取参数配置详情失败(%s): %s", result.ErrorCode, result.ErrorMsg)
  624. }
  625. return result, nil
  626. }
  627. func GetTrainJob(jobID, versionID string) (*models.GetTrainJobResult, error) {
  628. checkSetting()
  629. client := getRestyClient()
  630. var result models.GetTrainJobResult
  631. retry := 0
  632. sendjob:
  633. res, err := client.R().
  634. SetAuthToken(TOKEN).
  635. SetResult(&result).
  636. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID)
  637. if err != nil {
  638. return nil, fmt.Errorf("resty GetTrainJob: %v", err)
  639. }
  640. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  641. retry++
  642. _ = getToken()
  643. goto sendjob
  644. }
  645. if res.StatusCode() != http.StatusOK {
  646. var temp models.ErrorResult
  647. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  648. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  649. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  650. }
  651. log.Error("GetTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  652. return &result, fmt.Errorf("获取作业详情失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  653. }
  654. if !result.IsSuccess {
  655. log.Error("GetTrainJob(%s) failed", jobID)
  656. return &result, fmt.Errorf("获取作业详情失败")
  657. }
  658. return &result, nil
  659. }
  660. func GetTrainJobLog(jobID, versionID, baseLine, logFile, order string, lines int) (*models.GetTrainJobLogResult, error) {
  661. checkSetting()
  662. client := getRestyClient()
  663. var result models.GetTrainJobLogResult
  664. retry := 0
  665. sendjob:
  666. res, err := client.R().
  667. SetQueryParams(map[string]string{
  668. "base_line": baseLine,
  669. "lines": strconv.Itoa(lines),
  670. "log_file": logFile,
  671. "order": order,
  672. }).
  673. SetAuthToken(TOKEN).
  674. SetResult(&result).
  675. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/aom-log")
  676. if err != nil {
  677. return nil, fmt.Errorf("resty GetTrainJobLog: %v", err)
  678. }
  679. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  680. retry++
  681. _ = getToken()
  682. goto sendjob
  683. }
  684. if res.StatusCode() != http.StatusOK {
  685. var temp models.ErrorResult
  686. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  687. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  688. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  689. }
  690. log.Error("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  691. return &result, fmt.Errorf("获取作业日志失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  692. }
  693. if !result.IsSuccess {
  694. log.Error("GetTrainJobLog(%s) failed", jobID)
  695. return &result, fmt.Errorf("获取作业日志失败:%s", result.ErrorMsg)
  696. }
  697. return &result, nil
  698. }
  699. func GetTrainJobLogFileNames(jobID, versionID string) (*models.GetTrainJobLogFileNamesResult, error) {
  700. checkSetting()
  701. client := getRestyClient()
  702. var result models.GetTrainJobLogFileNamesResult
  703. retry := 0
  704. sendjob:
  705. res, err := client.R().
  706. SetAuthToken(TOKEN).
  707. SetResult(&result).
  708. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/log/file-names")
  709. if err != nil {
  710. return nil, fmt.Errorf("resty GetTrainJobLogFileNames: %v", err)
  711. }
  712. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  713. retry++
  714. _ = getToken()
  715. goto sendjob
  716. }
  717. if res.StatusCode() != http.StatusOK {
  718. var temp models.ErrorResult
  719. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  720. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  721. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  722. }
  723. log.Error("GetTrainJobLogFileNames failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  724. return &result, fmt.Errorf("GetTrainJobLogFileNames failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  725. }
  726. if !result.IsSuccess {
  727. log.Error("GetTrainJobLogFileNames(%s) failed", jobID)
  728. return &result, fmt.Errorf("获取作业日志文件失败:%s", result.ErrorMsg)
  729. }
  730. return &result, nil
  731. }
  732. func DelTrainJob(jobID string) (*models.TrainJobResult, error) {
  733. checkSetting()
  734. client := getRestyClient()
  735. var result models.TrainJobResult
  736. retry := 0
  737. sendjob:
  738. res, err := client.R().
  739. SetAuthToken(TOKEN).
  740. SetResult(&result).
  741. Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID)
  742. if err != nil {
  743. return &result, fmt.Errorf("resty DelTrainJob: %v", err)
  744. }
  745. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  746. retry++
  747. _ = getToken()
  748. goto sendjob
  749. }
  750. if res.StatusCode() != http.StatusOK {
  751. var temp models.ErrorResult
  752. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  753. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  754. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  755. }
  756. log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  757. return &result, fmt.Errorf("删除训练作业失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  758. }
  759. if !result.IsSuccess {
  760. log.Error("DelTrainJob(%s) failed", jobID)
  761. return &result, fmt.Errorf("删除训练作业失败:%s", result.ErrorMsg)
  762. }
  763. return &result, nil
  764. }
  765. func StopTrainJob(jobID, versionID string) (*models.TrainJobResult, error) {
  766. checkSetting()
  767. client := getRestyClient()
  768. var result models.TrainJobResult
  769. retry := 0
  770. sendjob:
  771. res, err := client.R().
  772. SetAuthToken(TOKEN).
  773. SetResult(&result).
  774. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/stop")
  775. if err != nil {
  776. return &result, fmt.Errorf("resty StopTrainJob: %v", err)
  777. }
  778. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  779. retry++
  780. _ = getToken()
  781. goto sendjob
  782. }
  783. if res.StatusCode() != http.StatusOK {
  784. var temp models.ErrorResult
  785. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  786. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  787. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  788. }
  789. log.Error("StopTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  790. return &result, fmt.Errorf("停止训练作业失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  791. }
  792. if !result.IsSuccess {
  793. log.Error("StopTrainJob(%s) failed", jobID)
  794. return &result, fmt.Errorf("停止训练作业失败:%s", result.ErrorMsg)
  795. }
  796. return &result, nil
  797. }
  798. func DelTrainJobVersion(jobID string, versionID string) (*models.TrainJobResult, error) {
  799. checkSetting()
  800. client := getRestyClient()
  801. var result models.TrainJobResult
  802. retry := 0
  803. sendjob:
  804. res, err := client.R().
  805. SetAuthToken(TOKEN).
  806. SetResult(&result).
  807. Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID)
  808. if err != nil {
  809. return &result, fmt.Errorf("resty DelTrainJobVersion: %v", err)
  810. }
  811. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  812. retry++
  813. _ = getToken()
  814. goto sendjob
  815. }
  816. if res.StatusCode() != http.StatusOK {
  817. var temp models.ErrorResult
  818. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  819. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  820. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  821. }
  822. log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  823. return &result, fmt.Errorf("删除训练作业版本失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  824. }
  825. if !result.IsSuccess {
  826. log.Error("DelTrainJob(%s) failed", jobID)
  827. return &result, fmt.Errorf("删除训练作业版本失败:%s", result.ErrorMsg)
  828. }
  829. return &result, nil
  830. }
  831. func createInferenceJob(createJobParams models.CreateInferenceJobParams) (*models.CreateTrainJobResult, error) {
  832. checkSetting()
  833. client := getRestyClient()
  834. var result models.CreateTrainJobResult
  835. retry := 0
  836. sendjob:
  837. res, err := client.R().
  838. SetHeader("Content-Type", "application/json").
  839. SetAuthToken(TOKEN).
  840. SetBody(createJobParams).
  841. SetResult(&result).
  842. Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
  843. if err != nil {
  844. return nil, fmt.Errorf("resty create inference-job: %s", err)
  845. }
  846. req, _ := json.Marshal(createJobParams)
  847. log.Info("%s", req)
  848. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  849. retry++
  850. _ = getToken()
  851. goto sendjob
  852. }
  853. if res.StatusCode() != http.StatusOK {
  854. var temp models.ErrorResult
  855. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  856. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  857. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  858. }
  859. log.Error("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  860. BootFileErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.BootFileUrl + "'."
  861. DataSetErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.DataUrl + "'."
  862. if temp.ErrorMsg == BootFileErrorMsg {
  863. log.Error("启动文件错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  864. return &result, fmt.Errorf("启动文件错误!")
  865. }
  866. if temp.ErrorMsg == DataSetErrorMsg {
  867. log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  868. return &result, fmt.Errorf("数据集错误!")
  869. }
  870. return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  871. }
  872. if !result.IsSuccess {
  873. log.Error("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  874. return &result, fmt.Errorf("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg)
  875. }
  876. return &result, nil
  877. }
  878. func createNotebook2(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) {
  879. checkSetting()
  880. client := getRestyClient()
  881. var result models.CreateNotebookResult
  882. retry := 0
  883. sendjob:
  884. res, err := client.R().
  885. SetHeader("Content-Type", "application/json").
  886. SetAuthToken(TOKEN).
  887. SetBody(createJobParams).
  888. SetResult(&result).
  889. Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2)
  890. if err != nil {
  891. return nil, fmt.Errorf("resty create notebook2: %s", err)
  892. }
  893. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  894. retry++
  895. _ = getToken()
  896. goto sendjob
  897. }
  898. var response models.NotebookResult
  899. err = json.Unmarshal(res.Body(), &response)
  900. if err != nil {
  901. log.Error("json.Unmarshal failed: %s", err.Error())
  902. return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
  903. }
  904. if len(response.ErrorCode) != 0 {
  905. log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  906. if response.ErrorCode == errorCodeExceedLimit {
  907. response.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
  908. }
  909. if response.ErrorCode == modelartsIllegalToken && retry < 1 {
  910. retry++
  911. _ = getToken()
  912. goto sendjob
  913. }
  914. return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
  915. }
  916. return &result, nil
  917. }
  918. func GetTrainJobMetricStatistic(jobID, versionID, podName string) (*models.GetTrainJobMetricStatisticResult, error) {
  919. checkSetting()
  920. client := getRestyClient()
  921. var result models.GetTrainJobMetricStatisticResult
  922. retry := 0
  923. sendjob:
  924. res, err := client.R().
  925. SetAuthToken(TOKEN).
  926. SetResult(&result).
  927. Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/pod/" + podName + "/metric-statistic")
  928. if err != nil {
  929. return nil, fmt.Errorf("resty GetTrainJobMetricStatistic: %v", err)
  930. }
  931. if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
  932. retry++
  933. _ = getToken()
  934. goto sendjob
  935. }
  936. if res.StatusCode() != http.StatusOK {
  937. var temp models.ErrorResult
  938. if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
  939. log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  940. return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
  941. }
  942. log.Error("GetTrainJobMetricStatistic failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  943. return &result, fmt.Errorf("GetTrainJobMetricStatistic failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
  944. }
  945. if !result.IsSuccess {
  946. log.Error("GetTrainJobMetricStatistic(%s) failed", jobID)
  947. return &result, fmt.Errorf("获取任务资源占用情况失败:%s", result.ErrorMsg)
  948. }
  949. return &result, nil
  950. }