Browse Source

Merge remote-tracking branch 'origin/V20220908' into res-manage-v2

# Conflicts:
#	routers/repo/cloudbrain.go
#	routers/repo/modelarts.go
res-manage-v2
chenyifan01 2 years ago
parent
commit
c5e0971655
17 changed files with 1192 additions and 165 deletions
  1. +23
    -36
      models/cloudbrain.go
  2. +3
    -3
      modules/cloudbrain/cloudbrain.go
  3. +2
    -2
      modules/grampus/grampus.go
  4. +26
    -10
      modules/modelarts/modelarts.go
  5. +214
    -0
      modules/modelarts_cd/modelarts.go
  6. +220
    -0
      modules/modelarts_cd/resty.go
  7. +42
    -0
      modules/modelarts_gateway/core/escape.go
  8. +208
    -0
      modules/modelarts_gateway/core/signer.go
  9. +75
    -7
      modules/setting/setting.go
  10. +2
    -0
      options/locale/locale_en-US.ini
  11. +2
    -0
      options/locale/locale_zh-CN.ini
  12. +139
    -30
      routers/api/v1/repo/cloudbrain.go
  13. +11
    -6
      routers/repo/cloudbrain.go
  14. +127
    -51
      routers/repo/modelarts.go
  15. +39
    -15
      templates/repo/cloudbrain/benchmark/show.tmpl
  16. +42
    -4
      templates/repo/cloudbrain/inference/show.tmpl
  17. +17
    -1
      templates/repo/modelarts/trainjob/new.tmpl

+ 23
- 36
models/cloudbrain.go View File

@@ -25,7 +25,8 @@ type ModelArtsJobStatus string
const (
TypeCloudBrainOne int = iota
TypeCloudBrainTwo
TypeC2Net //智算网络
TypeC2Net //智算网络
TypeCDCenter //成都智算中心

TypeCloudBrainAll = -1
)
@@ -601,37 +602,17 @@ type ResourceSpec struct {
ShareMemMiB int `json:"shareMemMiB"`
}

type FlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type SpecialPools struct {
Pools []*SpecialPool `json:"pools"`
}
type SpecialPool struct {
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
Flavor []*FlavorInfo `json:"flavor"`
}

type ImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}

type ImageInfoModelArts struct {
Id string `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
Flavor []*setting.FlavorInfo `json:"flavor"`
}

type PoolInfos struct {
@@ -737,6 +718,17 @@ type CreateNotebook2Params struct {
Volume VolumeReq `json:"volume"`
}

type CreateNotebookWithoutPoolParams struct {
JobName string `json:"name"`
Description string `json:"description"`
Duration int64 `json:"duration"` //ms
Feature string `json:"feature"`
Flavor string `json:"flavor"`
ImageID string `json:"image_id"`
WorkspaceID string `json:"workspace_id"`
Volume VolumeReq `json:"volume"`
}

type VolumeReq struct {
Capacity int `json:"capacity"`
Category string `json:"category"`
@@ -960,6 +952,7 @@ type NotebookGetJobTokenResult struct {
}

type NotebookDelResult struct {
NotebookResult
InstanceID string `json:"instance_id"`
}

@@ -1486,12 +1479,6 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
)
}

if len(opts.ComputeResource) > 0 {
cond = cond.And(
builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource},
)
}

if len(opts.JobTypes) > 0 {
if opts.JobTypeNot {
cond = cond.And(
@@ -1511,7 +1498,7 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
if (opts.Cluster) != "" {
if opts.Cluster == "resource_cluster_openi" {
cond = cond.And(
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}),
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, builder.Eq{"cloudbrain.type": TypeCDCenter}),
)
}
if opts.Cluster == "resource_cluster_c2net" {
@@ -1977,7 +1964,7 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy

func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) {
count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting).
And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
And("job_type = ? and user_id = ? and type in (?,?)", JobTypeDebug, userID, TypeCloudBrainTwo, TypeCDCenter).Count(new(Cloudbrain))
return int(count), err
}



+ 3
- 3
modules/cloudbrain/cloudbrain.go View File

@@ -20,7 +20,7 @@ import (
const (
//Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh | tee /model/benchmark-log.txt;echo "end benchmark"`
CodeMountPath = "/code"
DataSetMountPath = "/dataset"
ModelMountPath = "/model"
@@ -30,8 +30,8 @@ const (
Snn4imagenetMountPath = "/snn4imagenet"
BrainScoreMountPath = "/brainscore"
TaskInfoName = "/taskInfo"
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s'`
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s'`
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s' | tee /model/benchmark-log.txt`
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s' | tee /model/benchmark-log.txt`

SubTaskName = "task1"



+ 2
- 2
modules/grampus/grampus.go View File

@@ -30,8 +30,8 @@ const (

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
FlavorInfos *setting.StFlavorInfos
ImageInfos *setting.StImageInfosModelArts

SpecialPools *models.SpecialPools
)


+ 26
- 10
modules/modelarts/modelarts.go View File

@@ -1,6 +1,7 @@
package modelarts

import (
"code.gitea.io/gitea/modules/modelarts_cd"
"encoding/json"
"errors"
"fmt"
@@ -68,10 +69,9 @@ const (

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
TrainFlavorInfos *Flavor
SpecialPools *models.SpecialPools
SpecialPools *models.SpecialPools
MultiNodeConfig *MultiNodes
)

type GenerateTrainJobReq struct {
@@ -167,6 +167,14 @@ type ResourcePool struct {
} `json:"resource_pool"`
}

type MultiNodes struct{
Info []OrgMultiNode `json:"multinode"`
}
type OrgMultiNode struct{
Org string `json:"org"`
Node []int `json:"node"`
}

// type Parameter struct {
// Label string `json:"label"`
// Value string `json:"value"`
@@ -753,11 +761,7 @@ func GetNotebookImageName(imageId string) (string, error) {
var validImage = false
var imageName = ""

if ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
}

for _, imageInfo := range ImageInfos.ImageInfo {
for _, imageInfo := range setting.StImageInfos.ImageInfo {
if imageInfo.Id == imageId {
validImage = true
imageName = imageInfo.Value
@@ -778,6 +782,13 @@ func InitSpecialPool() {
}
}

func InitMultiNode(){
if MultiNodeConfig ==nil && setting.ModelArtsMultiNode!=""{
json.Unmarshal([]byte(setting.ModelArtsMultiNode), &MultiNodeConfig)
}

}

func HandleTrainJobInfo(task *models.Cloudbrain) error {

result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
@@ -814,8 +825,13 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error {
}

func HandleNotebookInfo(task *models.Cloudbrain) error {

result, err := GetNotebook2(task.JobID)
var result *models.GetNotebook2Result
var err error
if task.Type == models.TypeCloudBrainTwo {
result, err = GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}
if err != nil {
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
return err


+ 214
- 0
modules/modelarts_cd/modelarts.go View File

@@ -0,0 +1,214 @@
package modelarts_cd

import (
"errors"
"strconv"
"strings"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
)

const (
//notebook
storageTypeOBS = "obs"
autoStopDuration = 4 * 60 * 60
autoStopDurationMs = 4 * 60 * 60 * 1000
MORDELART_USER_IMAGE_ENGINE_ID = -1
DataSetMountPath = "/home/ma-user/work"
NotebookEnv = "Python3"
NotebookType = "Ascend"
FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"

//train-job
CodePath = "/code/"
OutputPath = "/output/"
ResultPath = "/result/"
LogPath = "/log/"
JobPath = "/job/"
OrderDesc = "desc" //向下查询
OrderAsc = "asc" //向上查询
Lines = 500
TrainUrl = "train_url"
DataUrl = "data_url"
MultiDataUrl = "multi_data_url"
ResultUrl = "result_url"
CkptUrl = "ckpt_url"
DeviceTarget = "device_target"
Ascend = "Ascend"
PerPage = 10
IsLatestVersion = "1"
NotLatestVersion = "0"
VersionCountOne = 1

SortByCreateTime = "create_time"
ConfigTypeCustom = "custom"
TotalVersionCount = 1
)

var ()

type VersionInfo struct {
Version []struct {
ID int `json:"id"`
Value string `json:"value"`
Url string `json:"url"`
} `json:"version"`
}

type Flavor struct {
Info []struct {
Code string `json:"code"`
Value string `json:"value"`
} `json:"flavor"`
}

type Engine struct {
Info []struct {
ID int `json:"id"`
Value string `json:"value"`
} `json:"engine"`
}

type ResourcePool struct {
Info []struct {
ID string `json:"id"`
Value string `json:"value"`
} `json:"resource_pool"`
}

type Parameters struct {
Parameter []struct {
Label string `json:"label"`
Value string `json:"value"`
} `json:"parameter"`
}

func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error {
imageName, err := GetNotebookImageName(imageId)
if err != nil {
log.Error("GetNotebookImageName failed: %v", err.Error())
return err
}
createTime := timeutil.TimeStampNow()
jobResult, err := createNotebook(models.CreateNotebookWithoutPoolParams{
JobName: jobName,
Description: description,
Flavor: flavor,
Duration: autoStopDurationMs,
ImageID: imageId,
Feature: models.NotebookFeature,
Volume: models.VolumeReq{
Capacity: setting.Capacity,
Category: models.EVSCategory,
Ownership: models.ManagedOwnership,
},
WorkspaceID: "0",
})
if err != nil {
log.Error("createNotebook failed: %v", err.Error())
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", displayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
JobID: models.TempJobId,
VersionID: models.TempVersionId,
Status: models.TempJobStatus,
Type: models.TypeCDCenter,
JobName: jobName,
JobType: string(models.JobTypeDebug),
})
if errTemp != nil {
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
return errTemp
}
}
return err
}
task := &models.Cloudbrain{
Status: jobResult.Status,
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobName: jobName,
FlavorCode: flavor,
DisplayJobName: displayJobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCDCenter,
Uuid: uuid,
ComputeResource: models.NPUResource,
Image: imageName,
Description: description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
}

err = models.CreateCloudbrain(task)
if err != nil {
return err
}

stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
return nil
}

func GetNotebookImageName(imageId string) (string, error) {
var validImage = false
var imageName = ""

for _, imageInfo := range setting.StImageInfos.ImageInfo {
if imageInfo.Id == imageId {
validImage = true
imageName = imageInfo.Value
}
}

if !validImage {
log.Error("the image id(%s) is invalid", imageId)
return imageName, errors.New("the image id is invalid")
}

return imageName, nil
}

/*
func HandleNotebookInfo(task *models.Cloudbrain) error {

result, err := GetNotebook(task.JobID)
if err != nil {
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
return err
}

if result != nil {
oldStatus := task.Status
task.Status = result.Status
if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
}
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.CorrectCreateUnix()
task.ComputeAndSetDuration()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
if task.FlavorCode == "" {
task.FlavorCode = result.Flavor
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
return err
}
}

return nil
}

*/

+ 220
- 0
modules/modelarts_cd/resty.go View File

@@ -0,0 +1,220 @@
package modelarts_cd

import (
"bytes"
"code.gitea.io/gitea/modules/modelarts_gateway/core"
"crypto/tls"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"strconv"
"time"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)

var (
httpClient *http.Client
HOST string
TOKEN string
)

const (
errorCodeExceedLimit = "ModelArts.0118"

//notebook 2.0
urlNotebook2 = "/notebooks"

//error code
modelartsIllegalToken = "ModelArts.6401"
NotebookNotFound = "ModelArts.6404"
NotebookNoPermission = "ModelArts.6407"
NotebookInvalid = "ModelArts.6400"
UnknownErrorPrefix = "UNKNOWN:"
)

func getHttpClient() *http.Client {
if httpClient == nil {
httpClient = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}},
}
}
return httpClient
}

func GetNotebook(jobID string) (*models.GetNotebook2Result, error) {
var result models.GetNotebook2Result

client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}
r, _ := http.NewRequest(http.MethodGet,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID,
nil)

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
var result models.NotebookActionResult

client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}
r, _ := http.NewRequest(http.MethodPost,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID+"/"+param.Action+"?duration="+strconv.Itoa(autoStopDurationMs),
nil)

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func DelNotebook(jobID string) (*models.NotebookDelResult, error) {
var result models.NotebookDelResult

client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}

r, _ := http.NewRequest(http.MethodDelete,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID,
nil)

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func createNotebook(createJobParams models.CreateNotebookWithoutPoolParams) (*models.CreateNotebookResult, error) {
var result models.CreateNotebookResult
client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}

req, _ := json.Marshal(createJobParams)
r, _ := http.NewRequest(http.MethodPost,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2,
ioutil.NopCloser(bytes.NewBuffer(req)))

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
}

if len(result.ErrorCode) != 0 {
log.Error("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
if result.ErrorCode == errorCodeExceedLimit {
result.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
}
return &result, fmt.Errorf("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

+ 42
- 0
modules/modelarts_gateway/core/escape.go View File

@@ -0,0 +1,42 @@
// based on https://github.com/golang/go/blob/master/src/net/url/url.go
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package core

func shouldEscape(c byte) bool {
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c == '-' || c == '~' || c == '.' {
return false
}
return true
}
func escape(s string) string {
hexCount := 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c) {
hexCount++
}
}

if hexCount == 0 {
return s
}

t := make([]byte, len(s)+2*hexCount)
j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case shouldEscape(c):
t[j] = '%'
t[j+1] = "0123456789ABCDEF"[c>>4]
t[j+2] = "0123456789ABCDEF"[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}

+ 208
- 0
modules/modelarts_gateway/core/signer.go View File

@@ -0,0 +1,208 @@
// HWS API Gateway Signature
// based on https://github.com/datastream/aws/blob/master/signv4.go
// Copyright (c) 2014, Xianjie

package core

import (
"bytes"
"crypto/hmac"
"crypto/sha256"
"fmt"
"io/ioutil"
"net/http"
"sort"
"strings"
"time"
)

const (
BasicDateFormat = "20060102T150405Z"
Algorithm = "SDK-HMAC-SHA256"
HeaderXDate = "X-Sdk-Date"
HeaderHost = "host"
HeaderAuthorization = "Authorization"
HeaderContentSha256 = "X-Sdk-Content-Sha256"
)

func hmacsha256(key []byte, data string) ([]byte, error) {
h := hmac.New(sha256.New, []byte(key))
if _, err := h.Write([]byte(data)); err != nil {
return nil, err
}
return h.Sum(nil), nil
}

// Build a CanonicalRequest from a regular request string
//
// CanonicalRequest =
// HTTPRequestMethod + '\n' +
// CanonicalURI + '\n' +
// CanonicalQueryString + '\n' +
// CanonicalHeaders + '\n' +
// SignedHeaders + '\n' +
// HexEncode(Hash(RequestPayload))
func CanonicalRequest(r *http.Request, signedHeaders []string) (string, error) {
var hexencode string
var err error
if hex := r.Header.Get(HeaderContentSha256); hex != "" {
hexencode = hex
} else {
data, err := RequestPayload(r)
if err != nil {
return "", err
}
hexencode, err = HexEncodeSHA256Hash(data)
if err != nil {
return "", err
}
}
return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, CanonicalURI(r), CanonicalQueryString(r), CanonicalHeaders(r, signedHeaders), strings.Join(signedHeaders, ";"), hexencode), err
}

// CanonicalURI returns request uri
func CanonicalURI(r *http.Request) string {
pattens := strings.Split(r.URL.Path, "/")
var uri []string
for _, v := range pattens {
uri = append(uri, escape(v))
}
urlpath := strings.Join(uri, "/")
if len(urlpath) == 0 || urlpath[len(urlpath)-1] != '/' {
urlpath = urlpath + "/"
}
return urlpath
}

// CanonicalQueryString
func CanonicalQueryString(r *http.Request) string {
var keys []string
query := r.URL.Query()
for key := range query {
keys = append(keys, key)
}
sort.Strings(keys)
var a []string
for _, key := range keys {
k := escape(key)
sort.Strings(query[key])
for _, v := range query[key] {
kv := fmt.Sprintf("%s=%s", k, escape(v))
a = append(a, kv)
}
}
queryStr := strings.Join(a, "&")
r.URL.RawQuery = queryStr
return queryStr
}

// CanonicalHeaders
func CanonicalHeaders(r *http.Request, signerHeaders []string) string {
var a []string
header := make(map[string][]string)
for k, v := range r.Header {
header[strings.ToLower(k)] = v
}
for _, key := range signerHeaders {
value := header[key]
if strings.EqualFold(key, HeaderHost) {
value = []string{r.Host}
}
sort.Strings(value)
for _, v := range value {
a = append(a, key+":"+strings.TrimSpace(v))
}
}
return fmt.Sprintf("%s\n", strings.Join(a, "\n"))
}

// SignedHeaders
func SignedHeaders(r *http.Request) []string {
var a []string
for key := range r.Header {
a = append(a, strings.ToLower(key))
}
sort.Strings(a)
return a
}

// RequestPayload
func RequestPayload(r *http.Request) ([]byte, error) {
if r.Body == nil {
return []byte(""), nil
}
b, err := ioutil.ReadAll(r.Body)
if err != nil {
return []byte(""), err
}
r.Body = ioutil.NopCloser(bytes.NewBuffer(b))
return b, err
}

// Create a "String to Sign".
func StringToSign(canonicalRequest string, t time.Time) (string, error) {
hash := sha256.New()
_, err := hash.Write([]byte(canonicalRequest))
if err != nil {
return "", err
}
return fmt.Sprintf("%s\n%s\n%x",
Algorithm, t.UTC().Format(BasicDateFormat), hash.Sum(nil)), nil
}

// Create the HWS Signature.
func SignStringToSign(stringToSign string, signingKey []byte) (string, error) {
hm, err := hmacsha256(signingKey, stringToSign)
return fmt.Sprintf("%x", hm), err
}

// HexEncodeSHA256Hash returns hexcode of sha256
func HexEncodeSHA256Hash(body []byte) (string, error) {
hash := sha256.New()
if body == nil {
body = []byte("")
}
_, err := hash.Write(body)
return fmt.Sprintf("%x", hash.Sum(nil)), err
}

// Get the finalized value for the "Authorization" header. The signature parameter is the output from SignStringToSign
func AuthHeaderValue(signature, accessKey string, signedHeaders []string) string {
return fmt.Sprintf("%s Access=%s, SignedHeaders=%s, Signature=%s", Algorithm, accessKey, strings.Join(signedHeaders, ";"), signature)
}

// Signature HWS meta
type Signer struct {
Key string
Secret string
}

// SignRequest set Authorization header
func (s *Signer) Sign(r *http.Request) error {
var t time.Time
var err error
var dt string
if dt = r.Header.Get(HeaderXDate); dt != "" {
t, err = time.Parse(BasicDateFormat, dt)
}
if err != nil || dt == "" {
t = time.Now()
r.Header.Set(HeaderXDate, t.UTC().Format(BasicDateFormat))
}
signedHeaders := SignedHeaders(r)
canonicalRequest, err := CanonicalRequest(r, signedHeaders)
if err != nil {
return err
}
stringToSign, err := StringToSign(canonicalRequest, t)
if err != nil {
return err
}
signature, err := SignStringToSign(stringToSign, []byte(s.Secret))
if err != nil {
return err
}
authValue := AuthHeaderValue(signature, s.Key, signedHeaders)
r.Header.Set(HeaderAuthorization, authValue)
return nil
}

+ 75
- 7
modules/setting/setting.go View File

@@ -75,6 +75,26 @@ type C2NetSqInfos struct {
C2NetSqInfo []*C2NetSequenceInfo `json:"sequence"`
}

type StFlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type StImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}

type ImageInfoModelArts struct {
Id string `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

var (
// AppVer settings
AppVer string
@@ -535,18 +555,31 @@ var (
AllowedOrg string
ProfileID string
PoolInfos string
Flavor string
FlavorInfos string
DebugHost string
ImageInfos string
Capacity int
MaxTempQueryTimes int
StFlavorInfo *StFlavorInfos
StImageInfos *StImageInfosModelArts
//train-job
ResourcePools string
Engines string
EngineVersions string
FlavorInfos string
TrainJobFLAVORINFOS string
ModelArtsSpecialPools string
ModelArtsMultiNode string

// modelarts-cd config
ModelartsCD = struct {
Enabled bool
EndPoint string
ProjectID string
AccessKey string
SecretKey string
ImageInfos string
FlavorInfos string
}{}

//grampus config
Grampus = struct {
@@ -1422,9 +1455,8 @@ func NewContext() {
AllowedOrg = sec.Key("ORGANIZATION").MustString("")
ProfileID = sec.Key("PROFILE_ID").MustString("")
PoolInfos = sec.Key("POOL_INFOS").MustString("")
Flavor = sec.Key("FLAVOR").MustString("")
ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
Capacity = sec.Key("IMAGE_INFOS").MustInt(100)
Capacity = sec.Key("CAPACITY").MustInt(100)
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30)
ResourcePools = sec.Key("Resource_Pools").MustString("")
Engines = sec.Key("Engines").MustString("")
@@ -1432,6 +1464,7 @@ func NewContext() {
FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("")
TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("")
ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("")
ModelArtsMultiNode=sec.Key("MULTI_NODE").MustString("")

sec = Cfg.Section("elk")
ElkUrl = sec.Key("ELKURL").MustString("")
@@ -1472,8 +1505,8 @@ func NewContext() {
Course.OrgName = sec.Key("org_name").MustString("")
Course.TeamName = sec.Key("team_name").MustString("")

GetGrampusConfig()
getGrampusConfig()
getModelartsCDConfig()
getModelConvertConfig()
}

@@ -1496,7 +1529,22 @@ func getModelConvertConfig() {
ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35)
}

func GetGrampusConfig() {
func getModelartsCDConfig() {
sec := Cfg.Section("modelarts-cd")

ModelartsCD.Enabled = sec.Key("ENABLED").MustBool(false)
ModelartsCD.EndPoint = sec.Key("ENDPOINT").MustString("https://modelarts.cn-southwest-228.cdzs.cn")
ModelartsCD.ProjectID = sec.Key("PROJECT_ID").MustString("")
ModelartsCD.AccessKey = sec.Key("ACCESS_KEY").MustString("")
ModelartsCD.SecretKey = sec.Key("SECRET_KEY").MustString("")
ModelartsCD.ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
ModelartsCD.FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("")

getNotebookImageInfos()
getNotebookFlavorInfos()
}

func getGrampusConfig() {
sec := Cfg.Section("grampus")

Grampus.Env = sec.Key("ENV").MustString("TEST")
@@ -1630,6 +1678,26 @@ func ensureLFSDirectory() {
}
}

func getNotebookImageInfos() {
if StImageInfos == nil {
if ModelartsCD.Enabled {
json.Unmarshal([]byte(ModelartsCD.ImageInfos), &StImageInfos)
} else {
json.Unmarshal([]byte(ImageInfos), &StImageInfos)
}
}
}

func getNotebookFlavorInfos() {
if StFlavorInfo == nil {
if ModelartsCD.Enabled {
json.Unmarshal([]byte(ModelartsCD.FlavorInfos), &StFlavorInfo)
} else {
json.Unmarshal([]byte(FlavorInfos), &StFlavorInfo)
}
}
}

// NewServices initializes the services
func NewServices() {
InitDBConfig()


+ 2
- 0
options/locale/locale_en-US.ini View File

@@ -1079,6 +1079,7 @@ balance.total_view = Total Balance
balance.available = Available Balance:
cloudbrain1 = cloudbrain1
cloudbrain2 = cloudbrain2
cdCenter = cd_ai_center
cloudbrain_selection = select cloudbrain
cloudbrain_platform_selection = Select the cloudbrain platform you want to use:
confirm_choice = Confirm
@@ -1213,6 +1214,7 @@ modelarts.infer_job.select_model = Select Model
modelarts.infer_job.boot_file_helper=The startup file is the entry file for your program execution and must end in.py.Such as inference.py, main.py, example/inference.py, case/main.py.
modelarts.infer_job.tooltip = The model has been deleted and cannot be viewed.
modelarts.download_log=Download log file
modelarts.no_node_right = The value of 'Amount of Compute Node' is wrong, you have no right to use the current value of 'Amount of Compute Node'.


debug_task_not_created = Debug task has not been created


+ 2
- 0
options/locale/locale_zh-CN.ini View File

@@ -1080,6 +1080,7 @@ balance.total_view=余额总览
balance.available=可用余额:
cloudbrain1=云脑1
cloudbrain2=云脑2
cdCenter=成都智算中心
intelligent_net=智算网络
cloudbrain_selection=云脑选择
cloudbrain_platform_selection=选择您准备使用的云脑平台:
@@ -1226,6 +1227,7 @@ modelarts.infer_job.select_model = 选择模型
modelarts.infer_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。比如inference.py、main.py、example/inference.py、case/main.py。
modelarts.infer_job.tooltip = 该模型已删除,无法查看。
modelarts.download_log=下载日志文件
modelarts.no_node_right = 计算节点数的值配置错误,您没有权限使用当前配置的计算节点数。


debug_task_not_created = 未创建过调试任务


+ 139
- 30
routers/api/v1/repo/cloudbrain.go View File

@@ -405,46 +405,83 @@ func CloudbrainDownloadLogFile(ctx *context.Context) {

func CloudbrainGetLog(ctx *context.Context) {
ID := ctx.Params(":id")
startLine := ctx.QueryInt("base_line")
lines := ctx.QueryInt("lines")
endLine := startLine + lines
order := ctx.Query("order")
if order == "asc" {
endLine = startLine
startLine = endLine - lines
if startLine < 0 {
startLine = 0
}
}
job, err := models.GetCloudbrainByID(ID)
if err != nil {
log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
result := getLogFromModelDir(job.JobName, startLine, endLine)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
lines := ctx.QueryInt("lines")
baseLine := ctx.Query("base_line")
order := ctx.Query("order")
var result map[string]interface{}
resultPath := "/model"
if job.JobType == string(models.JobTypeInference) {
resultPath = "/result"
}
if baseLine == "" && order == "desc" {
result = getLastLogFromModelDir(job.JobName, lines, resultPath)
} else {
startLine := ctx.QueryInt("base_line")
endLine := startLine + lines
if order == "asc" {
if baseLine == "" {
startLine = 0
endLine = lines
} else {
endLine = startLine
startLine = endLine - lines
if startLine < 0 {
startLine = 0
}
}
}
result = getLogFromModelDir(job.JobName, startLine, endLine, resultPath)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
}

re := map[string]interface{}{
"JobID": ID,
"LogFileName": result["FileName"],
"StartLine": startLine,
"EndLine": result["endLine"],
"StartLine": result["StartLine"],
"EndLine": result["EndLine"],
"Content": result["Content"],
"Lines": result["lines"],
"Lines": result["Lines"],
"CanLogDownload": result["FileName"] != "",
}
//result := CloudbrainGetLogByJobId(job.JobID, job.JobName)

ctx.JSON(http.StatusOK, re)
}

func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + "/model"
func getAllLineFromFile(path string) int {
count := 0
reader, err := os.Open(path)
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for {
_, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
count = count + 1
}
} else {
log.Info("error:" + err.Error())
}
return count
}

func getLastLogFromModelDir(jobName string, lines int, resultPath string) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
@@ -454,11 +491,81 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
re := ""
fileName := ""
count := 0
allLines := 0
startLine := 0
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
fileName = file.FileName
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
allLines = getAllLineFromFile(path)
startLine = allLines - lines
if startLine < 0 {
startLine = 0
}
count = allLines - startLine
log.Info("path=" + path)
reader, err := os.Open(path)
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for i := 0; i < allLines; i++ {
line, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
if error == nil {
if i >= startLine {
re = re + line
}
}
}
} else {
log.Info("error:" + err.Error())
}
break
}
}

return map[string]interface{}{
"JobName": jobName,
"Content": re,
"FileName": fileName,
"Lines": count,
"EndLine": allLines,
"StartLine": startLine,
}
}

func getLogFromModelDir(jobName string, startLine int, endLine int, resultPath string) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
return nil
}
if startLine == endLine {
return map[string]interface{}{
"JobName": jobName,
"Content": "",
"FileName": "",
"Lines": 0,
"EndLine": startLine,
"StartLine": startLine,
}
}
re := ""
fileName := ""
count := 0
fileEndLine := endLine
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
fileName = file.FileName
path := storage.GetMinioPath(jobName+"/model/", file.FileName)
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
log.Info("path=" + path)
reader, err := os.Open(path)
defer reader.Close()
@@ -467,7 +574,6 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
for i := 0; i < endLine; i++ {
line, error := r.ReadString('\n')
log.Info("line=" + line)
fileEndLine = i
if error == io.EOF {
log.Info("read file completed.")
break
@@ -478,11 +584,13 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
}
if error == nil {
if i >= startLine {
fileEndLine = i
re = re + line
count++
}
}
}
fileEndLine = fileEndLine + 1
} else {
log.Info("error:" + err.Error())
}
@@ -491,11 +599,12 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
}

return map[string]interface{}{
"JobName": jobName,
"Content": re,
"FileName": fileName,
"lines": count,
"endLine": fileEndLine,
"JobName": jobName,
"Content": re,
"FileName": fileName,
"Lines": count,
"EndLine": fileEndLine,
"StartLine": startLine,
}
}



+ 11
- 6
routers/repo/cloudbrain.go View File

@@ -2335,7 +2335,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form)
return
}

log.Info("Command=" + command)
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"))
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -2471,6 +2472,8 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}
log.Info("Command=" + command)
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"))
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -2598,7 +2601,7 @@ func getInferenceJobCommand(form auth.CreateCloudBrainInferencForm) (string, err

param += " --modelname" + "=" + form.CkptName

command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile
command += "python /code/" + bootFile + param + " | tee " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile

return command, nil
}
@@ -2627,7 +2630,7 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) {
}
}

command += "python /code/" + bootFile + param + " | tee " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile
command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile

return command, nil
}
@@ -2672,6 +2675,8 @@ func GetCloudbrainAiCenter(task models.Cloudbrain, ctx *context.Context) string
return ctx.Tr("repo.cloudbrain1")
} else if task.Type == models.TypeCloudBrainTwo {
return ctx.Tr("repo.cloudbrain2")
} else if task.Type == models.TypeCDCenter {
return ctx.Tr("repo.cdCenter")
} else if task.Type == models.TypeC2Net {
return getCutStringAiCenterByAiCenter(task.AiCenter)
}
@@ -2686,7 +2691,7 @@ func getCutStringAiCenterByAiCenter(aiCenter string) string {

}
func GetCloudbrainCluster(task models.Cloudbrain, ctx *context.Context) string {
if task.Type == models.TypeCloudBrainOne || task.Type == models.TypeCloudBrainTwo {
if task.Type == models.TypeCloudBrainOne || task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter {
return ctx.Tr("cloudbrain.resource_cluster_openi")
} else if task.Type == models.TypeC2Net {
return ctx.Tr("cloudbrain.resource_cluster_c2net")
@@ -2773,10 +2778,10 @@ func GetCloudbrainFlavorName(task models.Cloudbrain) (string, error) {
return CloudbrainOneFlavorName, nil
}
}
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeC2Net) && task.FlavorName != "" {
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeC2Net || task.Type == models.TypeCDCenter) && task.FlavorName != "" {
replaceFlavorName := strings.ReplaceAll(task.FlavorName, ":", ":")
return replaceFlavorName, nil
} else if task.Type == models.TypeCloudBrainTwo && task.FlavorName == "" && task.FlavorCode != "" {
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter) && task.FlavorName == "" && task.FlavorCode != "" {
cloudbrainTwoFlavorName := getFlavorNameByFlavorCode(task.FlavorCode)
return cloudbrainTwoFlavorName, nil
} else if task.Type == models.TypeCloudBrainTwo && task.JobType == string(models.JobTypeDebug) && task.FlavorName == "" && task.FlavorCode == "" {


+ 127
- 51
routers/repo/modelarts.go View File

@@ -2,6 +2,7 @@ package repo

import (
"archive/zip"
"code.gitea.io/gitea/modules/modelarts_cd"
"code.gitea.io/gitea/services/cloudbrain/resource"
"encoding/json"
"errors"
@@ -60,18 +61,11 @@ func DebugJobIndex(ctx *context.Context) {
if page <= 0 {
page = 1
}
typeCloudBrain := models.TypeCloudBrainAll
jobTypeNot := false
if listType == models.GPUResource {
typeCloudBrain = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
typeCloudBrain = models.TypeCloudBrainTwo
} else if listType == models.AllResource {
typeCloudBrain = models.TypeCloudBrainAll
} else {
log.Error("listType(%s) error", listType)
ctx.ServerError("listType error", errors.New("listType error"))
return
var computeResource string
if listType != models.AllResource {
computeResource = listType
}

var jobTypes []string
@@ -81,10 +75,11 @@ func DebugJobIndex(ctx *context.Context) {
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: typeCloudBrain,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
RepoID: repo.ID,
ComputeResource: computeResource,
Type: models.TypeCloudBrainAll,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
})
if err != nil {
ctx.ServerError("Get debugjob faild:", err)
@@ -134,6 +129,7 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return err
}
ctx.Data["attachments"] = attachs
ctx.Data["images"] = setting.StImageInfos.ImageInfo

if modelarts.ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &modelarts.ImageInfos)
@@ -256,6 +252,13 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
return
}
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec)

if setting.ModelartsCD.Enabled {
err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
} else {
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
}

if err != nil {
log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
@@ -385,36 +388,16 @@ func setShowSpecBySpecialPoolConfig(ctx *context.Context, findSpec bool, task *m
}
}

func NotebookDebug(ctx *context.Context) {
var jobID = ctx.Params(":jobid")

result, err := modelarts.GetJob(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}

res, err := modelarts.GetJobToken(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}

urls := strings.Split(result.Spec.Annotations.Url, "/")
urlPrefix := result.Spec.Annotations.TargetDomain
for i, url := range urls {
if i > 2 {
urlPrefix += "/" + url
}
}

debugUrl := urlPrefix + "?token=" + res.Token
ctx.Redirect(debugUrl)
}

func NotebookDebug2(ctx *context.Context) {
var err error
var result *models.GetNotebook2Result
task := ctx.Cloudbrain
result, err := modelarts.GetNotebook2(task.JobID)
if task.Type == models.TypeCloudBrainTwo {
result, err = modelarts.GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}

if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
@@ -478,7 +461,13 @@ func NotebookRestart(ctx *context.Context) {
Action: models.ActionStart,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}

if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"])
/* 暂不处理再次调试502的场景,详情见方案
@@ -563,7 +552,14 @@ func NotebookStop(ctx *context.Context) {
Action: models.ActionStop,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
var err error
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}

if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
@@ -613,7 +609,13 @@ func NotebookDel(ctx *context.Context) {
return
}

_, err := modelarts.DelNotebook2(task.JobID)
var err error
if task.Type == models.TypeCloudBrainTwo {
_, err = modelarts.DelNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
_, err = modelarts_cd.DelNotebook(task.JobID)
}

if err != nil {
log.Error("DelNotebook2(%s) failed:%v", task.JobName, err.Error())
if strings.Contains(err.Error(), modelarts.NotebookNotFound) || strings.Contains(err.Error(), modelarts.NotebookNoPermission) || strings.Contains(err.Error(), modelarts.NotebookInvalid) {
@@ -764,6 +766,8 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
ctx.Data["WaitCount"] = waitCount

setMultiNodeIfConfigureMatch(ctx)

return nil
}

@@ -777,6 +781,18 @@ func prepareCloudbrainTwoTrainSpecs(ctx *context.Context) {
ctx.Data["Specs"] = noteBookSpecs
}

func setMultiNodeIfConfigureMatch(ctx *context.Context) {
modelarts.InitMultiNode()
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, ctx.User.ID); isInOrg {
ctx.Data["WorkNode"] = info.Node
break
}
}
}
}

func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) {
modelarts.InitSpecialPool()

@@ -885,6 +901,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "")
ctx.Data["WaitCount"] = waitCount
setMultiNodeIfConfigureMatch(ctx)

return nil
}
@@ -1106,6 +1123,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName

errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
return
}

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -1344,6 +1368,48 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}

func checkMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}
modelarts.InitMultiNode()
var isServerNumValid = false
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
if isInNodes(info.Node, serverNum) {
isServerNumValid = true
break
}

}
}
}
if isServerNumValid {
return ""
} else {
return "repo.modelarts.no_node_right"
}
}
func checkInferenceJobMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}

return "repo.modelarts.no_node_right"

}

func isInNodes(nodes []int, num int) bool {
for _, node := range nodes {
if node == num {
return true
}
}
return false

}

func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, string) {
userImageUrl := ""
userCommand := ""
@@ -1378,6 +1444,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.Data["PageIsTrainJob"] = true
var jobID = ctx.Params(":jobid")

errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
return
}

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -1717,10 +1790,6 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
return errors.New("启动文件必须是python文件")
}

if form.WorkServerNumber > 2 || form.WorkServerNumber < 1 {
log.Error("the WorkServerNumber(%d) must be in (1,2)", form.WorkServerNumber)
return errors.New("计算节点数必须在1-2之间")
}
if form.BranchName == "" {
log.Error("the branch must not be null!", form.BranchName)
return errors.New("代码分支不能为空!")
@@ -2025,6 +2094,13 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ckptUrl := "/" + form.TrainUrl + form.CkptName
log.Info("ckpt url:" + ckptUrl)

errStr := checkInferenceJobMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
return
}

count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -2252,7 +2328,7 @@ func checkModelArtsSpecialPool(ctx *context.Context, flavorCode string, jobType
if !isMatchPool {
isMatchSpec := false
if jobType == string(models.JobTypeDebug) {
for _, flavor := range modelarts.FlavorInfos.FlavorInfo {
for _, flavor := range setting.StFlavorInfo.FlavorInfo {
if flavor.Value == flavorCode {
isMatchSpec = true
break


+ 39
- 15
templates/repo/cloudbrain/benchmark/show.tmpl View File

@@ -256,8 +256,9 @@
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item"
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second{{$k}}"
onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item log_bottom" data-tab="second{{$k}}"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
@@ -528,19 +529,42 @@
</div>
<div class="ui tab" data-tab="second{{$k}}">
<div>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>

</div>

</div>
<a id="{{.VersionName}}-log-down"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}'
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

</div>
</div>


+ 42
- 4
templates/repo/cloudbrain/inference/show.tmpl View File

@@ -228,7 +228,7 @@
</h4>
{{with .task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/cloudbrain/inference-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}">
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}">
<div class="active title padding0">
<div class="according-panel-heading">
@@ -264,7 +264,8 @@
data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second"
onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
<a class="item log_bottom" data-tab="third"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item load-model-file" data-tab="four"
data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/inference-job/{{.JobID}}/result_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
@@ -524,7 +525,7 @@
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}"
<div class="ui attached"
style="height: 390px !important; overflow: auto;">
<input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}">
<input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}">
@@ -537,7 +538,44 @@

</div>

<div class="ui tab" data-tab="third">
<div>
<a id="{{.VersionName}}-log-down"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}'
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

<div class="ui tab" data-tab="four">
<input type="hidden" name="model{{.VersionName}}" value="-1">


+ 17
- 1
templates/repo/modelarts/trainjob/new.tmpl View File

@@ -294,8 +294,24 @@
id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1"
readonly>
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_id">
<select class="ui dropdown width" style='width: 100%;' name="work_server_id">
{{if .WorkNode}}
{{range .WorkNode}}

{{if $.work_server_number}}
{{if eq . $.work_server_number }}
<option name="server_id" selected value="{{.}}">{{.}}</option>
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{end}}

{{else}}
<option name="server_id" value="1">1</option>
{{end}}
</select>
</div>



Loading…
Cancel
Save