Browse Source

智算任务监控接口变动

Signed-off-by: zouap <zouap@pcl.ac.cn>
zouap
zouap 2 years ago
parent
commit
c95c0dd748
2 changed files with 12 additions and 8 deletions
  1. +9
    -0
      models/cloudbrain.go
  2. +3
    -8
      modules/grampus/resty.go

+ 9
- 0
models/cloudbrain.go View File

@@ -1661,6 +1661,15 @@ type Metrics struct {
Value []string `json:"value"` //获取的监控值的序列,元素为String类型 Value []string `json:"value"` //获取的监控值的序列,元素为String类型
} }


type NewModelArtsMetricStatisticResult struct {
MetricsInfo []Metrics `json:"metrics"` //监控详情
}

type NewModelArtsMetrics struct {
Metric string `json:"metric"` //监控指标项
Value []float32 `json:"value"` //获取的监控值的序列,元素为float类型
}

func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
sess := x.NewSession() sess := x.NewSession()
defer sess.Close() defer sess.Close()


+ 3
- 8
modules/grampus/resty.go View File

@@ -315,10 +315,10 @@ func GetTrainJobLog(jobID string) (string, error) {
return logContent, nil return logContent, nil
} }


func GetGrampusMetrics(jobID string) (models.GetTrainJobMetricStatisticResult, error) {
func GetGrampusMetrics(jobID string) (models.NewModelArtsMetricStatisticResult, error) {
checkSetting() checkSetting()
client := getRestyClient() client := getRestyClient()
var result models.GetTrainJobMetricStatisticResult
var result models.NewModelArtsMetricStatisticResult
res, err := client.R(). res, err := client.R().
SetAuthToken(TOKEN). SetAuthToken(TOKEN).
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics") Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics")
@@ -331,12 +331,7 @@ func GetGrampusMetrics(jobID string) (models.GetTrainJobMetricStatisticResult, e
return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
} }
if res.StatusCode() != http.StatusOK { if res.StatusCode() != http.StatusOK {
log.Error("Call GrampusMetrics failed(%d):%s(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg)
return result, fmt.Errorf("Call GrampusMetrics failed(%d):%d(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg)
}
if !result.IsSuccess {
log.Error("GetGrampusMetrics(%s) failed", jobID)
return result, fmt.Errorf("GetGrampusMetrics failed:%s", result.ErrorMsg)
return result, fmt.Errorf("Call GrampusMetrics failed(%d)", res.StatusCode())
} }
return result, nil return result, nil
} }


Loading…
Cancel
Save