# Conflicts: # routers/repo/cloudbrain.go # routers/repo/modelarts.gores-manage-v2
@@ -25,7 +25,8 @@ type ModelArtsJobStatus string | |||||
const ( | const ( | ||||
TypeCloudBrainOne int = iota | TypeCloudBrainOne int = iota | ||||
TypeCloudBrainTwo | TypeCloudBrainTwo | ||||
TypeC2Net //智算网络 | |||||
TypeC2Net //智算网络 | |||||
TypeCDCenter //成都智算中心 | |||||
TypeCloudBrainAll = -1 | TypeCloudBrainAll = -1 | ||||
) | ) | ||||
@@ -601,37 +602,17 @@ type ResourceSpec struct { | |||||
ShareMemMiB int `json:"shareMemMiB"` | ShareMemMiB int `json:"shareMemMiB"` | ||||
} | } | ||||
type FlavorInfos struct { | |||||
FlavorInfo []*FlavorInfo `json:"flavor_info"` | |||||
} | |||||
type FlavorInfo struct { | |||||
Id int `json:"id"` | |||||
Value string `json:"value"` | |||||
Desc string `json:"desc"` | |||||
} | |||||
type SpecialPools struct { | type SpecialPools struct { | ||||
Pools []*SpecialPool `json:"pools"` | Pools []*SpecialPool `json:"pools"` | ||||
} | } | ||||
type SpecialPool struct { | type SpecialPool struct { | ||||
Org string `json:"org"` | |||||
Type string `json:"type"` | |||||
IsExclusive bool `json:"isExclusive"` | |||||
Pool []*GpuInfo `json:"pool"` | |||||
JobType []string `json:"jobType"` | |||||
ResourceSpec []*ResourceSpec `json:"resourceSpecs"` | |||||
Flavor []*FlavorInfo `json:"flavor"` | |||||
} | |||||
type ImageInfosModelArts struct { | |||||
ImageInfo []*ImageInfoModelArts `json:"image_info"` | |||||
} | |||||
type ImageInfoModelArts struct { | |||||
Id string `json:"id"` | |||||
Value string `json:"value"` | |||||
Desc string `json:"desc"` | |||||
Org string `json:"org"` | |||||
Type string `json:"type"` | |||||
IsExclusive bool `json:"isExclusive"` | |||||
Pool []*GpuInfo `json:"pool"` | |||||
JobType []string `json:"jobType"` | |||||
ResourceSpec []*ResourceSpec `json:"resourceSpecs"` | |||||
Flavor []*setting.FlavorInfo `json:"flavor"` | |||||
} | } | ||||
type PoolInfos struct { | type PoolInfos struct { | ||||
@@ -737,6 +718,17 @@ type CreateNotebook2Params struct { | |||||
Volume VolumeReq `json:"volume"` | Volume VolumeReq `json:"volume"` | ||||
} | } | ||||
type CreateNotebookWithoutPoolParams struct { | |||||
JobName string `json:"name"` | |||||
Description string `json:"description"` | |||||
Duration int64 `json:"duration"` //ms | |||||
Feature string `json:"feature"` | |||||
Flavor string `json:"flavor"` | |||||
ImageID string `json:"image_id"` | |||||
WorkspaceID string `json:"workspace_id"` | |||||
Volume VolumeReq `json:"volume"` | |||||
} | |||||
type VolumeReq struct { | type VolumeReq struct { | ||||
Capacity int `json:"capacity"` | Capacity int `json:"capacity"` | ||||
Category string `json:"category"` | Category string `json:"category"` | ||||
@@ -960,6 +952,7 @@ type NotebookGetJobTokenResult struct { | |||||
} | } | ||||
type NotebookDelResult struct { | type NotebookDelResult struct { | ||||
NotebookResult | |||||
InstanceID string `json:"instance_id"` | InstanceID string `json:"instance_id"` | ||||
} | } | ||||
@@ -1486,12 +1479,6 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||||
) | ) | ||||
} | } | ||||
if len(opts.ComputeResource) > 0 { | |||||
cond = cond.And( | |||||
builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource}, | |||||
) | |||||
} | |||||
if len(opts.JobTypes) > 0 { | if len(opts.JobTypes) > 0 { | ||||
if opts.JobTypeNot { | if opts.JobTypeNot { | ||||
cond = cond.And( | cond = cond.And( | ||||
@@ -1511,7 +1498,7 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||||
if (opts.Cluster) != "" { | if (opts.Cluster) != "" { | ||||
if opts.Cluster == "resource_cluster_openi" { | if opts.Cluster == "resource_cluster_openi" { | ||||
cond = cond.And( | cond = cond.And( | ||||
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}), | |||||
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, builder.Eq{"cloudbrain.type": TypeCDCenter}), | |||||
) | ) | ||||
} | } | ||||
if opts.Cluster == "resource_cluster_c2net" { | if opts.Cluster == "resource_cluster_c2net" { | ||||
@@ -1977,7 +1964,7 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy | |||||
func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) { | func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) { | ||||
count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting). | count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting). | ||||
And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) | |||||
And("job_type = ? and user_id = ? and type in (?,?)", JobTypeDebug, userID, TypeCloudBrainTwo, TypeCDCenter).Count(new(Cloudbrain)) | |||||
return int(count), err | return int(count), err | ||||
} | } | ||||
@@ -20,7 +20,7 @@ import ( | |||||
const ( | const ( | ||||
//Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` | //Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` | ||||
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` | //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` | ||||
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` | |||||
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh | tee /model/benchmark-log.txt;echo "end benchmark"` | |||||
CodeMountPath = "/code" | CodeMountPath = "/code" | ||||
DataSetMountPath = "/dataset" | DataSetMountPath = "/dataset" | ||||
ModelMountPath = "/model" | ModelMountPath = "/model" | ||||
@@ -30,8 +30,8 @@ const ( | |||||
Snn4imagenetMountPath = "/snn4imagenet" | Snn4imagenetMountPath = "/snn4imagenet" | ||||
BrainScoreMountPath = "/brainscore" | BrainScoreMountPath = "/brainscore" | ||||
TaskInfoName = "/taskInfo" | TaskInfoName = "/taskInfo" | ||||
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s'` | |||||
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s'` | |||||
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s' | tee /model/benchmark-log.txt` | |||||
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s' | tee /model/benchmark-log.txt` | |||||
SubTaskName = "task1" | SubTaskName = "task1" | ||||
@@ -30,8 +30,8 @@ const ( | |||||
var ( | var ( | ||||
poolInfos *models.PoolInfos | poolInfos *models.PoolInfos | ||||
FlavorInfos *models.FlavorInfos | |||||
ImageInfos *models.ImageInfosModelArts | |||||
FlavorInfos *setting.StFlavorInfos | |||||
ImageInfos *setting.StImageInfosModelArts | |||||
SpecialPools *models.SpecialPools | SpecialPools *models.SpecialPools | ||||
) | ) | ||||
@@ -1,6 +1,7 @@ | |||||
package modelarts | package modelarts | ||||
import ( | import ( | ||||
"code.gitea.io/gitea/modules/modelarts_cd" | |||||
"encoding/json" | "encoding/json" | ||||
"errors" | "errors" | ||||
"fmt" | "fmt" | ||||
@@ -68,10 +69,9 @@ const ( | |||||
var ( | var ( | ||||
poolInfos *models.PoolInfos | poolInfos *models.PoolInfos | ||||
FlavorInfos *models.FlavorInfos | |||||
ImageInfos *models.ImageInfosModelArts | |||||
TrainFlavorInfos *Flavor | TrainFlavorInfos *Flavor | ||||
SpecialPools *models.SpecialPools | |||||
SpecialPools *models.SpecialPools | |||||
MultiNodeConfig *MultiNodes | |||||
) | ) | ||||
type GenerateTrainJobReq struct { | type GenerateTrainJobReq struct { | ||||
@@ -167,6 +167,14 @@ type ResourcePool struct { | |||||
} `json:"resource_pool"` | } `json:"resource_pool"` | ||||
} | } | ||||
type MultiNodes struct{ | |||||
Info []OrgMultiNode `json:"multinode"` | |||||
} | |||||
type OrgMultiNode struct{ | |||||
Org string `json:"org"` | |||||
Node []int `json:"node"` | |||||
} | |||||
// type Parameter struct { | // type Parameter struct { | ||||
// Label string `json:"label"` | // Label string `json:"label"` | ||||
// Value string `json:"value"` | // Value string `json:"value"` | ||||
@@ -753,11 +761,7 @@ func GetNotebookImageName(imageId string) (string, error) { | |||||
var validImage = false | var validImage = false | ||||
var imageName = "" | var imageName = "" | ||||
if ImageInfos == nil { | |||||
json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos) | |||||
} | |||||
for _, imageInfo := range ImageInfos.ImageInfo { | |||||
for _, imageInfo := range setting.StImageInfos.ImageInfo { | |||||
if imageInfo.Id == imageId { | if imageInfo.Id == imageId { | ||||
validImage = true | validImage = true | ||||
imageName = imageInfo.Value | imageName = imageInfo.Value | ||||
@@ -778,6 +782,13 @@ func InitSpecialPool() { | |||||
} | } | ||||
} | } | ||||
func InitMultiNode(){ | |||||
if MultiNodeConfig ==nil && setting.ModelArtsMultiNode!=""{ | |||||
json.Unmarshal([]byte(setting.ModelArtsMultiNode), &MultiNodeConfig) | |||||
} | |||||
} | |||||
func HandleTrainJobInfo(task *models.Cloudbrain) error { | func HandleTrainJobInfo(task *models.Cloudbrain) error { | ||||
result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | ||||
@@ -814,8 +825,13 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error { | |||||
} | } | ||||
func HandleNotebookInfo(task *models.Cloudbrain) error { | func HandleNotebookInfo(task *models.Cloudbrain) error { | ||||
result, err := GetNotebook2(task.JobID) | |||||
var result *models.GetNotebook2Result | |||||
var err error | |||||
if task.Type == models.TypeCloudBrainTwo { | |||||
result, err = GetNotebook2(task.JobID) | |||||
} else if task.Type == models.TypeCDCenter { | |||||
result, err = modelarts_cd.GetNotebook(task.JobID) | |||||
} | |||||
if err != nil { | if err != nil { | ||||
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) | log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) | ||||
return err | return err | ||||
@@ -0,0 +1,214 @@ | |||||
package modelarts_cd | |||||
import ( | |||||
"errors" | |||||
"strconv" | |||||
"strings" | |||||
"code.gitea.io/gitea/models" | |||||
"code.gitea.io/gitea/modules/context" | |||||
"code.gitea.io/gitea/modules/log" | |||||
"code.gitea.io/gitea/modules/notification" | |||||
"code.gitea.io/gitea/modules/setting" | |||||
"code.gitea.io/gitea/modules/timeutil" | |||||
) | |||||
const ( | |||||
//notebook | |||||
storageTypeOBS = "obs" | |||||
autoStopDuration = 4 * 60 * 60 | |||||
autoStopDurationMs = 4 * 60 * 60 * 1000 | |||||
MORDELART_USER_IMAGE_ENGINE_ID = -1 | |||||
DataSetMountPath = "/home/ma-user/work" | |||||
NotebookEnv = "Python3" | |||||
NotebookType = "Ascend" | |||||
FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" | |||||
//train-job | |||||
CodePath = "/code/" | |||||
OutputPath = "/output/" | |||||
ResultPath = "/result/" | |||||
LogPath = "/log/" | |||||
JobPath = "/job/" | |||||
OrderDesc = "desc" //向下查询 | |||||
OrderAsc = "asc" //向上查询 | |||||
Lines = 500 | |||||
TrainUrl = "train_url" | |||||
DataUrl = "data_url" | |||||
MultiDataUrl = "multi_data_url" | |||||
ResultUrl = "result_url" | |||||
CkptUrl = "ckpt_url" | |||||
DeviceTarget = "device_target" | |||||
Ascend = "Ascend" | |||||
PerPage = 10 | |||||
IsLatestVersion = "1" | |||||
NotLatestVersion = "0" | |||||
VersionCountOne = 1 | |||||
SortByCreateTime = "create_time" | |||||
ConfigTypeCustom = "custom" | |||||
TotalVersionCount = 1 | |||||
) | |||||
var () | |||||
type VersionInfo struct { | |||||
Version []struct { | |||||
ID int `json:"id"` | |||||
Value string `json:"value"` | |||||
Url string `json:"url"` | |||||
} `json:"version"` | |||||
} | |||||
type Flavor struct { | |||||
Info []struct { | |||||
Code string `json:"code"` | |||||
Value string `json:"value"` | |||||
} `json:"flavor"` | |||||
} | |||||
type Engine struct { | |||||
Info []struct { | |||||
ID int `json:"id"` | |||||
Value string `json:"value"` | |||||
} `json:"engine"` | |||||
} | |||||
type ResourcePool struct { | |||||
Info []struct { | |||||
ID string `json:"id"` | |||||
Value string `json:"value"` | |||||
} `json:"resource_pool"` | |||||
} | |||||
type Parameters struct { | |||||
Parameter []struct { | |||||
Label string `json:"label"` | |||||
Value string `json:"value"` | |||||
} `json:"parameter"` | |||||
} | |||||
func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error { | |||||
imageName, err := GetNotebookImageName(imageId) | |||||
if err != nil { | |||||
log.Error("GetNotebookImageName failed: %v", err.Error()) | |||||
return err | |||||
} | |||||
createTime := timeutil.TimeStampNow() | |||||
jobResult, err := createNotebook(models.CreateNotebookWithoutPoolParams{ | |||||
JobName: jobName, | |||||
Description: description, | |||||
Flavor: flavor, | |||||
Duration: autoStopDurationMs, | |||||
ImageID: imageId, | |||||
Feature: models.NotebookFeature, | |||||
Volume: models.VolumeReq{ | |||||
Capacity: setting.Capacity, | |||||
Category: models.EVSCategory, | |||||
Ownership: models.ManagedOwnership, | |||||
}, | |||||
WorkspaceID: "0", | |||||
}) | |||||
if err != nil { | |||||
log.Error("createNotebook failed: %v", err.Error()) | |||||
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) { | |||||
log.Info("(%s)unknown error, set temp status", displayJobName) | |||||
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
JobID: models.TempJobId, | |||||
VersionID: models.TempVersionId, | |||||
Status: models.TempJobStatus, | |||||
Type: models.TypeCDCenter, | |||||
JobName: jobName, | |||||
JobType: string(models.JobTypeDebug), | |||||
}) | |||||
if errTemp != nil { | |||||
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
return errTemp | |||||
} | |||||
} | |||||
return err | |||||
} | |||||
task := &models.Cloudbrain{ | |||||
Status: jobResult.Status, | |||||
UserID: ctx.User.ID, | |||||
RepoID: ctx.Repo.Repository.ID, | |||||
JobID: jobResult.ID, | |||||
JobName: jobName, | |||||
FlavorCode: flavor, | |||||
DisplayJobName: displayJobName, | |||||
JobType: string(models.JobTypeDebug), | |||||
Type: models.TypeCDCenter, | |||||
Uuid: uuid, | |||||
ComputeResource: models.NPUResource, | |||||
Image: imageName, | |||||
Description: description, | |||||
CreatedUnix: createTime, | |||||
UpdatedUnix: createTime, | |||||
} | |||||
err = models.CreateCloudbrain(task) | |||||
if err != nil { | |||||
return err | |||||
} | |||||
stringId := strconv.FormatInt(task.ID, 10) | |||||
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) | |||||
return nil | |||||
} | |||||
func GetNotebookImageName(imageId string) (string, error) { | |||||
var validImage = false | |||||
var imageName = "" | |||||
for _, imageInfo := range setting.StImageInfos.ImageInfo { | |||||
if imageInfo.Id == imageId { | |||||
validImage = true | |||||
imageName = imageInfo.Value | |||||
} | |||||
} | |||||
if !validImage { | |||||
log.Error("the image id(%s) is invalid", imageId) | |||||
return imageName, errors.New("the image id is invalid") | |||||
} | |||||
return imageName, nil | |||||
} | |||||
/* | |||||
func HandleNotebookInfo(task *models.Cloudbrain) error { | |||||
result, err := GetNotebook(task.JobID) | |||||
if err != nil { | |||||
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) | |||||
return err | |||||
} | |||||
if result != nil { | |||||
oldStatus := task.Status | |||||
task.Status = result.Status | |||||
if task.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
} | |||||
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
task.EndTime = timeutil.TimeStampNow() | |||||
} | |||||
task.CorrectCreateUnix() | |||||
task.ComputeAndSetDuration() | |||||
if oldStatus != task.Status { | |||||
notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
} | |||||
if task.FlavorCode == "" { | |||||
task.FlavorCode = result.Flavor | |||||
} | |||||
err = models.UpdateJob(task) | |||||
if err != nil { | |||||
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) | |||||
return err | |||||
} | |||||
} | |||||
return nil | |||||
} | |||||
*/ |
@@ -0,0 +1,220 @@ | |||||
package modelarts_cd | |||||
import ( | |||||
"bytes" | |||||
"code.gitea.io/gitea/modules/modelarts_gateway/core" | |||||
"crypto/tls" | |||||
"encoding/json" | |||||
"fmt" | |||||
"io/ioutil" | |||||
"net/http" | |||||
"strconv" | |||||
"time" | |||||
"code.gitea.io/gitea/models" | |||||
"code.gitea.io/gitea/modules/log" | |||||
"code.gitea.io/gitea/modules/setting" | |||||
) | |||||
var ( | |||||
httpClient *http.Client | |||||
HOST string | |||||
TOKEN string | |||||
) | |||||
const ( | |||||
errorCodeExceedLimit = "ModelArts.0118" | |||||
//notebook 2.0 | |||||
urlNotebook2 = "/notebooks" | |||||
//error code | |||||
modelartsIllegalToken = "ModelArts.6401" | |||||
NotebookNotFound = "ModelArts.6404" | |||||
NotebookNoPermission = "ModelArts.6407" | |||||
NotebookInvalid = "ModelArts.6400" | |||||
UnknownErrorPrefix = "UNKNOWN:" | |||||
) | |||||
func getHttpClient() *http.Client { | |||||
if httpClient == nil { | |||||
httpClient = &http.Client{ | |||||
Timeout: 30 * time.Second, | |||||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, | |||||
} | |||||
} | |||||
return httpClient | |||||
} | |||||
func GetNotebook(jobID string) (*models.GetNotebook2Result, error) { | |||||
var result models.GetNotebook2Result | |||||
client := getHttpClient() | |||||
s := core.Signer{ | |||||
Key: setting.ModelartsCD.AccessKey, | |||||
Secret: setting.ModelartsCD.SecretKey, | |||||
} | |||||
r, _ := http.NewRequest(http.MethodGet, | |||||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID, | |||||
nil) | |||||
r.Header.Add("content-type", "application/json") | |||||
s.Sign(r) | |||||
resp, err := client.Do(r) | |||||
if err != nil { | |||||
log.Error("client.Do failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||||
} | |||||
defer resp.Body.Close() | |||||
body, err := ioutil.ReadAll(resp.Body) | |||||
if err != nil { | |||||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||||
} | |||||
err = json.Unmarshal(body, &result) | |||||
if err != nil { | |||||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||||
} | |||||
if len(result.ErrorCode) != 0 { | |||||
log.Error("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
return &result, fmt.Errorf("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
} | |||||
return &result, nil | |||||
} | |||||
func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||||
var result models.NotebookActionResult | |||||
client := getHttpClient() | |||||
s := core.Signer{ | |||||
Key: setting.ModelartsCD.AccessKey, | |||||
Secret: setting.ModelartsCD.SecretKey, | |||||
} | |||||
r, _ := http.NewRequest(http.MethodPost, | |||||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID+"/"+param.Action+"?duration="+strconv.Itoa(autoStopDurationMs), | |||||
nil) | |||||
r.Header.Add("content-type", "application/json") | |||||
s.Sign(r) | |||||
resp, err := client.Do(r) | |||||
if err != nil { | |||||
log.Error("client.Do failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||||
} | |||||
defer resp.Body.Close() | |||||
body, err := ioutil.ReadAll(resp.Body) | |||||
if err != nil { | |||||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||||
} | |||||
err = json.Unmarshal(body, &result) | |||||
if err != nil { | |||||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||||
} | |||||
if len(result.ErrorCode) != 0 { | |||||
log.Error("ManageNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
} | |||||
return &result, nil | |||||
} | |||||
func DelNotebook(jobID string) (*models.NotebookDelResult, error) { | |||||
var result models.NotebookDelResult | |||||
client := getHttpClient() | |||||
s := core.Signer{ | |||||
Key: setting.ModelartsCD.AccessKey, | |||||
Secret: setting.ModelartsCD.SecretKey, | |||||
} | |||||
r, _ := http.NewRequest(http.MethodDelete, | |||||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID, | |||||
nil) | |||||
r.Header.Add("content-type", "application/json") | |||||
s.Sign(r) | |||||
resp, err := client.Do(r) | |||||
if err != nil { | |||||
log.Error("client.Do failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||||
} | |||||
defer resp.Body.Close() | |||||
body, err := ioutil.ReadAll(resp.Body) | |||||
if err != nil { | |||||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||||
} | |||||
err = json.Unmarshal(body, &result) | |||||
if err != nil { | |||||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||||
} | |||||
if len(result.ErrorCode) != 0 { | |||||
log.Error("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
} | |||||
return &result, nil | |||||
} | |||||
func createNotebook(createJobParams models.CreateNotebookWithoutPoolParams) (*models.CreateNotebookResult, error) { | |||||
var result models.CreateNotebookResult | |||||
client := getHttpClient() | |||||
s := core.Signer{ | |||||
Key: setting.ModelartsCD.AccessKey, | |||||
Secret: setting.ModelartsCD.SecretKey, | |||||
} | |||||
req, _ := json.Marshal(createJobParams) | |||||
r, _ := http.NewRequest(http.MethodPost, | |||||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2, | |||||
ioutil.NopCloser(bytes.NewBuffer(req))) | |||||
r.Header.Add("content-type", "application/json") | |||||
s.Sign(r) | |||||
resp, err := client.Do(r) | |||||
if err != nil { | |||||
log.Error("client.Do failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||||
} | |||||
defer resp.Body.Close() | |||||
body, err := ioutil.ReadAll(resp.Body) | |||||
if err != nil { | |||||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||||
} | |||||
err = json.Unmarshal(body, &result) | |||||
if err != nil { | |||||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||||
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error()) | |||||
} | |||||
if len(result.ErrorCode) != 0 { | |||||
log.Error("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
if result.ErrorCode == errorCodeExceedLimit { | |||||
result.ErrorMsg = "所选规格使用数量已超过最大配额限制。" | |||||
} | |||||
return &result, fmt.Errorf("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
} | |||||
return &result, nil | |||||
} |
@@ -0,0 +1,42 @@ | |||||
// based on https://github.com/golang/go/blob/master/src/net/url/url.go | |||||
// Copyright 2009 The Go Authors. All rights reserved. | |||||
// Use of this source code is governed by a BSD-style | |||||
// license that can be found in the LICENSE file. | |||||
package core | |||||
func shouldEscape(c byte) bool { | |||||
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c == '-' || c == '~' || c == '.' { | |||||
return false | |||||
} | |||||
return true | |||||
} | |||||
func escape(s string) string { | |||||
hexCount := 0 | |||||
for i := 0; i < len(s); i++ { | |||||
c := s[i] | |||||
if shouldEscape(c) { | |||||
hexCount++ | |||||
} | |||||
} | |||||
if hexCount == 0 { | |||||
return s | |||||
} | |||||
t := make([]byte, len(s)+2*hexCount) | |||||
j := 0 | |||||
for i := 0; i < len(s); i++ { | |||||
switch c := s[i]; { | |||||
case shouldEscape(c): | |||||
t[j] = '%' | |||||
t[j+1] = "0123456789ABCDEF"[c>>4] | |||||
t[j+2] = "0123456789ABCDEF"[c&15] | |||||
j += 3 | |||||
default: | |||||
t[j] = s[i] | |||||
j++ | |||||
} | |||||
} | |||||
return string(t) | |||||
} |
@@ -0,0 +1,208 @@ | |||||
// HWS API Gateway Signature | |||||
// based on https://github.com/datastream/aws/blob/master/signv4.go | |||||
// Copyright (c) 2014, Xianjie | |||||
package core | |||||
import ( | |||||
"bytes" | |||||
"crypto/hmac" | |||||
"crypto/sha256" | |||||
"fmt" | |||||
"io/ioutil" | |||||
"net/http" | |||||
"sort" | |||||
"strings" | |||||
"time" | |||||
) | |||||
const ( | |||||
BasicDateFormat = "20060102T150405Z" | |||||
Algorithm = "SDK-HMAC-SHA256" | |||||
HeaderXDate = "X-Sdk-Date" | |||||
HeaderHost = "host" | |||||
HeaderAuthorization = "Authorization" | |||||
HeaderContentSha256 = "X-Sdk-Content-Sha256" | |||||
) | |||||
func hmacsha256(key []byte, data string) ([]byte, error) { | |||||
h := hmac.New(sha256.New, []byte(key)) | |||||
if _, err := h.Write([]byte(data)); err != nil { | |||||
return nil, err | |||||
} | |||||
return h.Sum(nil), nil | |||||
} | |||||
// Build a CanonicalRequest from a regular request string | |||||
// | |||||
// CanonicalRequest = | |||||
// HTTPRequestMethod + '\n' + | |||||
// CanonicalURI + '\n' + | |||||
// CanonicalQueryString + '\n' + | |||||
// CanonicalHeaders + '\n' + | |||||
// SignedHeaders + '\n' + | |||||
// HexEncode(Hash(RequestPayload)) | |||||
func CanonicalRequest(r *http.Request, signedHeaders []string) (string, error) { | |||||
var hexencode string | |||||
var err error | |||||
if hex := r.Header.Get(HeaderContentSha256); hex != "" { | |||||
hexencode = hex | |||||
} else { | |||||
data, err := RequestPayload(r) | |||||
if err != nil { | |||||
return "", err | |||||
} | |||||
hexencode, err = HexEncodeSHA256Hash(data) | |||||
if err != nil { | |||||
return "", err | |||||
} | |||||
} | |||||
return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", r.Method, CanonicalURI(r), CanonicalQueryString(r), CanonicalHeaders(r, signedHeaders), strings.Join(signedHeaders, ";"), hexencode), err | |||||
} | |||||
// CanonicalURI returns request uri | |||||
func CanonicalURI(r *http.Request) string { | |||||
pattens := strings.Split(r.URL.Path, "/") | |||||
var uri []string | |||||
for _, v := range pattens { | |||||
uri = append(uri, escape(v)) | |||||
} | |||||
urlpath := strings.Join(uri, "/") | |||||
if len(urlpath) == 0 || urlpath[len(urlpath)-1] != '/' { | |||||
urlpath = urlpath + "/" | |||||
} | |||||
return urlpath | |||||
} | |||||
// CanonicalQueryString | |||||
func CanonicalQueryString(r *http.Request) string { | |||||
var keys []string | |||||
query := r.URL.Query() | |||||
for key := range query { | |||||
keys = append(keys, key) | |||||
} | |||||
sort.Strings(keys) | |||||
var a []string | |||||
for _, key := range keys { | |||||
k := escape(key) | |||||
sort.Strings(query[key]) | |||||
for _, v := range query[key] { | |||||
kv := fmt.Sprintf("%s=%s", k, escape(v)) | |||||
a = append(a, kv) | |||||
} | |||||
} | |||||
queryStr := strings.Join(a, "&") | |||||
r.URL.RawQuery = queryStr | |||||
return queryStr | |||||
} | |||||
// CanonicalHeaders | |||||
func CanonicalHeaders(r *http.Request, signerHeaders []string) string { | |||||
var a []string | |||||
header := make(map[string][]string) | |||||
for k, v := range r.Header { | |||||
header[strings.ToLower(k)] = v | |||||
} | |||||
for _, key := range signerHeaders { | |||||
value := header[key] | |||||
if strings.EqualFold(key, HeaderHost) { | |||||
value = []string{r.Host} | |||||
} | |||||
sort.Strings(value) | |||||
for _, v := range value { | |||||
a = append(a, key+":"+strings.TrimSpace(v)) | |||||
} | |||||
} | |||||
return fmt.Sprintf("%s\n", strings.Join(a, "\n")) | |||||
} | |||||
// SignedHeaders | |||||
func SignedHeaders(r *http.Request) []string { | |||||
var a []string | |||||
for key := range r.Header { | |||||
a = append(a, strings.ToLower(key)) | |||||
} | |||||
sort.Strings(a) | |||||
return a | |||||
} | |||||
// RequestPayload | |||||
func RequestPayload(r *http.Request) ([]byte, error) { | |||||
if r.Body == nil { | |||||
return []byte(""), nil | |||||
} | |||||
b, err := ioutil.ReadAll(r.Body) | |||||
if err != nil { | |||||
return []byte(""), err | |||||
} | |||||
r.Body = ioutil.NopCloser(bytes.NewBuffer(b)) | |||||
return b, err | |||||
} | |||||
// Create a "String to Sign". | |||||
func StringToSign(canonicalRequest string, t time.Time) (string, error) { | |||||
hash := sha256.New() | |||||
_, err := hash.Write([]byte(canonicalRequest)) | |||||
if err != nil { | |||||
return "", err | |||||
} | |||||
return fmt.Sprintf("%s\n%s\n%x", | |||||
Algorithm, t.UTC().Format(BasicDateFormat), hash.Sum(nil)), nil | |||||
} | |||||
// Create the HWS Signature. | |||||
func SignStringToSign(stringToSign string, signingKey []byte) (string, error) { | |||||
hm, err := hmacsha256(signingKey, stringToSign) | |||||
return fmt.Sprintf("%x", hm), err | |||||
} | |||||
// HexEncodeSHA256Hash returns hexcode of sha256 | |||||
func HexEncodeSHA256Hash(body []byte) (string, error) { | |||||
hash := sha256.New() | |||||
if body == nil { | |||||
body = []byte("") | |||||
} | |||||
_, err := hash.Write(body) | |||||
return fmt.Sprintf("%x", hash.Sum(nil)), err | |||||
} | |||||
// Get the finalized value for the "Authorization" header. The signature parameter is the output from SignStringToSign | |||||
func AuthHeaderValue(signature, accessKey string, signedHeaders []string) string { | |||||
return fmt.Sprintf("%s Access=%s, SignedHeaders=%s, Signature=%s", Algorithm, accessKey, strings.Join(signedHeaders, ";"), signature) | |||||
} | |||||
// Signature HWS meta | |||||
type Signer struct { | |||||
Key string | |||||
Secret string | |||||
} | |||||
// SignRequest set Authorization header | |||||
func (s *Signer) Sign(r *http.Request) error { | |||||
var t time.Time | |||||
var err error | |||||
var dt string | |||||
if dt = r.Header.Get(HeaderXDate); dt != "" { | |||||
t, err = time.Parse(BasicDateFormat, dt) | |||||
} | |||||
if err != nil || dt == "" { | |||||
t = time.Now() | |||||
r.Header.Set(HeaderXDate, t.UTC().Format(BasicDateFormat)) | |||||
} | |||||
signedHeaders := SignedHeaders(r) | |||||
canonicalRequest, err := CanonicalRequest(r, signedHeaders) | |||||
if err != nil { | |||||
return err | |||||
} | |||||
stringToSign, err := StringToSign(canonicalRequest, t) | |||||
if err != nil { | |||||
return err | |||||
} | |||||
signature, err := SignStringToSign(stringToSign, []byte(s.Secret)) | |||||
if err != nil { | |||||
return err | |||||
} | |||||
authValue := AuthHeaderValue(signature, s.Key, signedHeaders) | |||||
r.Header.Set(HeaderAuthorization, authValue) | |||||
return nil | |||||
} |
@@ -75,6 +75,26 @@ type C2NetSqInfos struct { | |||||
C2NetSqInfo []*C2NetSequenceInfo `json:"sequence"` | C2NetSqInfo []*C2NetSequenceInfo `json:"sequence"` | ||||
} | } | ||||
type StFlavorInfos struct { | |||||
FlavorInfo []*FlavorInfo `json:"flavor_info"` | |||||
} | |||||
type FlavorInfo struct { | |||||
Id int `json:"id"` | |||||
Value string `json:"value"` | |||||
Desc string `json:"desc"` | |||||
} | |||||
type StImageInfosModelArts struct { | |||||
ImageInfo []*ImageInfoModelArts `json:"image_info"` | |||||
} | |||||
type ImageInfoModelArts struct { | |||||
Id string `json:"id"` | |||||
Value string `json:"value"` | |||||
Desc string `json:"desc"` | |||||
} | |||||
var ( | var ( | ||||
// AppVer settings | // AppVer settings | ||||
AppVer string | AppVer string | ||||
@@ -535,18 +555,31 @@ var ( | |||||
AllowedOrg string | AllowedOrg string | ||||
ProfileID string | ProfileID string | ||||
PoolInfos string | PoolInfos string | ||||
Flavor string | |||||
FlavorInfos string | |||||
DebugHost string | DebugHost string | ||||
ImageInfos string | ImageInfos string | ||||
Capacity int | Capacity int | ||||
MaxTempQueryTimes int | MaxTempQueryTimes int | ||||
StFlavorInfo *StFlavorInfos | |||||
StImageInfos *StImageInfosModelArts | |||||
//train-job | //train-job | ||||
ResourcePools string | ResourcePools string | ||||
Engines string | Engines string | ||||
EngineVersions string | EngineVersions string | ||||
FlavorInfos string | |||||
TrainJobFLAVORINFOS string | TrainJobFLAVORINFOS string | ||||
ModelArtsSpecialPools string | ModelArtsSpecialPools string | ||||
ModelArtsMultiNode string | |||||
// modelarts-cd config | |||||
ModelartsCD = struct { | |||||
Enabled bool | |||||
EndPoint string | |||||
ProjectID string | |||||
AccessKey string | |||||
SecretKey string | |||||
ImageInfos string | |||||
FlavorInfos string | |||||
}{} | |||||
//grampus config | //grampus config | ||||
Grampus = struct { | Grampus = struct { | ||||
@@ -1422,9 +1455,8 @@ func NewContext() { | |||||
AllowedOrg = sec.Key("ORGANIZATION").MustString("") | AllowedOrg = sec.Key("ORGANIZATION").MustString("") | ||||
ProfileID = sec.Key("PROFILE_ID").MustString("") | ProfileID = sec.Key("PROFILE_ID").MustString("") | ||||
PoolInfos = sec.Key("POOL_INFOS").MustString("") | PoolInfos = sec.Key("POOL_INFOS").MustString("") | ||||
Flavor = sec.Key("FLAVOR").MustString("") | |||||
ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | ||||
Capacity = sec.Key("IMAGE_INFOS").MustInt(100) | |||||
Capacity = sec.Key("CAPACITY").MustInt(100) | |||||
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30) | MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30) | ||||
ResourcePools = sec.Key("Resource_Pools").MustString("") | ResourcePools = sec.Key("Resource_Pools").MustString("") | ||||
Engines = sec.Key("Engines").MustString("") | Engines = sec.Key("Engines").MustString("") | ||||
@@ -1432,6 +1464,7 @@ func NewContext() { | |||||
FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("") | FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("") | ||||
TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("") | TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("") | ||||
ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("") | ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("") | ||||
ModelArtsMultiNode=sec.Key("MULTI_NODE").MustString("") | |||||
sec = Cfg.Section("elk") | sec = Cfg.Section("elk") | ||||
ElkUrl = sec.Key("ELKURL").MustString("") | ElkUrl = sec.Key("ELKURL").MustString("") | ||||
@@ -1472,8 +1505,8 @@ func NewContext() { | |||||
Course.OrgName = sec.Key("org_name").MustString("") | Course.OrgName = sec.Key("org_name").MustString("") | ||||
Course.TeamName = sec.Key("team_name").MustString("") | Course.TeamName = sec.Key("team_name").MustString("") | ||||
GetGrampusConfig() | |||||
getGrampusConfig() | |||||
getModelartsCDConfig() | |||||
getModelConvertConfig() | getModelConvertConfig() | ||||
} | } | ||||
@@ -1496,7 +1529,22 @@ func getModelConvertConfig() { | |||||
ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35) | ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35) | ||||
} | } | ||||
func GetGrampusConfig() { | |||||
func getModelartsCDConfig() { | |||||
sec := Cfg.Section("modelarts-cd") | |||||
ModelartsCD.Enabled = sec.Key("ENABLED").MustBool(false) | |||||
ModelartsCD.EndPoint = sec.Key("ENDPOINT").MustString("https://modelarts.cn-southwest-228.cdzs.cn") | |||||
ModelartsCD.ProjectID = sec.Key("PROJECT_ID").MustString("") | |||||
ModelartsCD.AccessKey = sec.Key("ACCESS_KEY").MustString("") | |||||
ModelartsCD.SecretKey = sec.Key("SECRET_KEY").MustString("") | |||||
ModelartsCD.ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | |||||
ModelartsCD.FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("") | |||||
getNotebookImageInfos() | |||||
getNotebookFlavorInfos() | |||||
} | |||||
func getGrampusConfig() { | |||||
sec := Cfg.Section("grampus") | sec := Cfg.Section("grampus") | ||||
Grampus.Env = sec.Key("ENV").MustString("TEST") | Grampus.Env = sec.Key("ENV").MustString("TEST") | ||||
@@ -1630,6 +1678,26 @@ func ensureLFSDirectory() { | |||||
} | } | ||||
} | } | ||||
func getNotebookImageInfos() { | |||||
if StImageInfos == nil { | |||||
if ModelartsCD.Enabled { | |||||
json.Unmarshal([]byte(ModelartsCD.ImageInfos), &StImageInfos) | |||||
} else { | |||||
json.Unmarshal([]byte(ImageInfos), &StImageInfos) | |||||
} | |||||
} | |||||
} | |||||
func getNotebookFlavorInfos() { | |||||
if StFlavorInfo == nil { | |||||
if ModelartsCD.Enabled { | |||||
json.Unmarshal([]byte(ModelartsCD.FlavorInfos), &StFlavorInfo) | |||||
} else { | |||||
json.Unmarshal([]byte(FlavorInfos), &StFlavorInfo) | |||||
} | |||||
} | |||||
} | |||||
// NewServices initializes the services | // NewServices initializes the services | ||||
func NewServices() { | func NewServices() { | ||||
InitDBConfig() | InitDBConfig() | ||||
@@ -1079,6 +1079,7 @@ balance.total_view = Total Balance | |||||
balance.available = Available Balance: | balance.available = Available Balance: | ||||
cloudbrain1 = cloudbrain1 | cloudbrain1 = cloudbrain1 | ||||
cloudbrain2 = cloudbrain2 | cloudbrain2 = cloudbrain2 | ||||
cdCenter = cd_ai_center | |||||
cloudbrain_selection = select cloudbrain | cloudbrain_selection = select cloudbrain | ||||
cloudbrain_platform_selection = Select the cloudbrain platform you want to use: | cloudbrain_platform_selection = Select the cloudbrain platform you want to use: | ||||
confirm_choice = Confirm | confirm_choice = Confirm | ||||
@@ -1213,6 +1214,7 @@ modelarts.infer_job.select_model = Select Model | |||||
modelarts.infer_job.boot_file_helper=The startup file is the entry file for your program execution and must end in.py.Such as inference.py, main.py, example/inference.py, case/main.py. | modelarts.infer_job.boot_file_helper=The startup file is the entry file for your program execution and must end in.py.Such as inference.py, main.py, example/inference.py, case/main.py. | ||||
modelarts.infer_job.tooltip = The model has been deleted and cannot be viewed. | modelarts.infer_job.tooltip = The model has been deleted and cannot be viewed. | ||||
modelarts.download_log=Download log file | modelarts.download_log=Download log file | ||||
modelarts.no_node_right = The value of 'Amount of Compute Node' is wrong, you have no right to use the current value of 'Amount of Compute Node'. | |||||
debug_task_not_created = Debug task has not been created | debug_task_not_created = Debug task has not been created | ||||
@@ -1080,6 +1080,7 @@ balance.total_view=余额总览 | |||||
balance.available=可用余额: | balance.available=可用余额: | ||||
cloudbrain1=云脑1 | cloudbrain1=云脑1 | ||||
cloudbrain2=云脑2 | cloudbrain2=云脑2 | ||||
cdCenter=成都智算中心 | |||||
intelligent_net=智算网络 | intelligent_net=智算网络 | ||||
cloudbrain_selection=云脑选择 | cloudbrain_selection=云脑选择 | ||||
cloudbrain_platform_selection=选择您准备使用的云脑平台: | cloudbrain_platform_selection=选择您准备使用的云脑平台: | ||||
@@ -1226,6 +1227,7 @@ modelarts.infer_job.select_model = 选择模型 | |||||
modelarts.infer_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。比如inference.py、main.py、example/inference.py、case/main.py。 | modelarts.infer_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。比如inference.py、main.py、example/inference.py、case/main.py。 | ||||
modelarts.infer_job.tooltip = 该模型已删除,无法查看。 | modelarts.infer_job.tooltip = 该模型已删除,无法查看。 | ||||
modelarts.download_log=下载日志文件 | modelarts.download_log=下载日志文件 | ||||
modelarts.no_node_right = 计算节点数的值配置错误,您没有权限使用当前配置的计算节点数。 | |||||
debug_task_not_created = 未创建过调试任务 | debug_task_not_created = 未创建过调试任务 | ||||
@@ -405,46 +405,83 @@ func CloudbrainDownloadLogFile(ctx *context.Context) { | |||||
func CloudbrainGetLog(ctx *context.Context) { | func CloudbrainGetLog(ctx *context.Context) { | ||||
ID := ctx.Params(":id") | ID := ctx.Params(":id") | ||||
startLine := ctx.QueryInt("base_line") | |||||
lines := ctx.QueryInt("lines") | |||||
endLine := startLine + lines | |||||
order := ctx.Query("order") | |||||
if order == "asc" { | |||||
endLine = startLine | |||||
startLine = endLine - lines | |||||
if startLine < 0 { | |||||
startLine = 0 | |||||
} | |||||
} | |||||
job, err := models.GetCloudbrainByID(ID) | job, err := models.GetCloudbrainByID(ID) | ||||
if err != nil { | if err != nil { | ||||
log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"]) | ||||
ctx.ServerError(err.Error(), err) | ctx.ServerError(err.Error(), err) | ||||
return | return | ||||
} | } | ||||
result := getLogFromModelDir(job.JobName, startLine, endLine) | |||||
if result == nil { | |||||
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"]) | |||||
ctx.ServerError(err.Error(), err) | |||||
return | |||||
lines := ctx.QueryInt("lines") | |||||
baseLine := ctx.Query("base_line") | |||||
order := ctx.Query("order") | |||||
var result map[string]interface{} | |||||
resultPath := "/model" | |||||
if job.JobType == string(models.JobTypeInference) { | |||||
resultPath = "/result" | |||||
} | |||||
if baseLine == "" && order == "desc" { | |||||
result = getLastLogFromModelDir(job.JobName, lines, resultPath) | |||||
} else { | |||||
startLine := ctx.QueryInt("base_line") | |||||
endLine := startLine + lines | |||||
if order == "asc" { | |||||
if baseLine == "" { | |||||
startLine = 0 | |||||
endLine = lines | |||||
} else { | |||||
endLine = startLine | |||||
startLine = endLine - lines | |||||
if startLine < 0 { | |||||
startLine = 0 | |||||
} | |||||
} | |||||
} | |||||
result = getLogFromModelDir(job.JobName, startLine, endLine, resultPath) | |||||
if result == nil { | |||||
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"]) | |||||
ctx.ServerError(err.Error(), err) | |||||
return | |||||
} | |||||
} | } | ||||
re := map[string]interface{}{ | re := map[string]interface{}{ | ||||
"JobID": ID, | "JobID": ID, | ||||
"LogFileName": result["FileName"], | "LogFileName": result["FileName"], | ||||
"StartLine": startLine, | |||||
"EndLine": result["endLine"], | |||||
"StartLine": result["StartLine"], | |||||
"EndLine": result["EndLine"], | |||||
"Content": result["Content"], | "Content": result["Content"], | ||||
"Lines": result["lines"], | |||||
"Lines": result["Lines"], | |||||
"CanLogDownload": result["FileName"] != "", | "CanLogDownload": result["FileName"] != "", | ||||
} | } | ||||
//result := CloudbrainGetLogByJobId(job.JobID, job.JobName) | //result := CloudbrainGetLogByJobId(job.JobID, job.JobName) | ||||
ctx.JSON(http.StatusOK, re) | ctx.JSON(http.StatusOK, re) | ||||
} | } | ||||
func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]interface{} { | |||||
prefix := "/" + setting.CBCodePathPrefix + jobName + "/model" | |||||
func getAllLineFromFile(path string) int { | |||||
count := 0 | |||||
reader, err := os.Open(path) | |||||
defer reader.Close() | |||||
if err == nil { | |||||
r := bufio.NewReader(reader) | |||||
for { | |||||
_, error := r.ReadString('\n') | |||||
if error == io.EOF { | |||||
log.Info("read file completed.") | |||||
break | |||||
} | |||||
if error != nil { | |||||
log.Info("read file error." + error.Error()) | |||||
break | |||||
} | |||||
count = count + 1 | |||||
} | |||||
} else { | |||||
log.Info("error:" + err.Error()) | |||||
} | |||||
return count | |||||
} | |||||
func getLastLogFromModelDir(jobName string, lines int, resultPath string) map[string]interface{} { | |||||
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath | |||||
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "") | files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "") | ||||
if err != nil { | if err != nil { | ||||
log.Error("query cloudbrain model failed: %v", err) | log.Error("query cloudbrain model failed: %v", err) | ||||
@@ -454,11 +491,81 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i | |||||
re := "" | re := "" | ||||
fileName := "" | fileName := "" | ||||
count := 0 | count := 0 | ||||
allLines := 0 | |||||
startLine := 0 | |||||
for _, file := range files { | |||||
if strings.HasSuffix(file.FileName, "log.txt") { | |||||
fileName = file.FileName | |||||
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName) | |||||
allLines = getAllLineFromFile(path) | |||||
startLine = allLines - lines | |||||
if startLine < 0 { | |||||
startLine = 0 | |||||
} | |||||
count = allLines - startLine | |||||
log.Info("path=" + path) | |||||
reader, err := os.Open(path) | |||||
defer reader.Close() | |||||
if err == nil { | |||||
r := bufio.NewReader(reader) | |||||
for i := 0; i < allLines; i++ { | |||||
line, error := r.ReadString('\n') | |||||
if error == io.EOF { | |||||
log.Info("read file completed.") | |||||
break | |||||
} | |||||
if error != nil { | |||||
log.Info("read file error." + error.Error()) | |||||
break | |||||
} | |||||
if error == nil { | |||||
if i >= startLine { | |||||
re = re + line | |||||
} | |||||
} | |||||
} | |||||
} else { | |||||
log.Info("error:" + err.Error()) | |||||
} | |||||
break | |||||
} | |||||
} | |||||
return map[string]interface{}{ | |||||
"JobName": jobName, | |||||
"Content": re, | |||||
"FileName": fileName, | |||||
"Lines": count, | |||||
"EndLine": allLines, | |||||
"StartLine": startLine, | |||||
} | |||||
} | |||||
func getLogFromModelDir(jobName string, startLine int, endLine int, resultPath string) map[string]interface{} { | |||||
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath | |||||
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "") | |||||
if err != nil { | |||||
log.Error("query cloudbrain model failed: %v", err) | |||||
return nil | |||||
} | |||||
if startLine == endLine { | |||||
return map[string]interface{}{ | |||||
"JobName": jobName, | |||||
"Content": "", | |||||
"FileName": "", | |||||
"Lines": 0, | |||||
"EndLine": startLine, | |||||
"StartLine": startLine, | |||||
} | |||||
} | |||||
re := "" | |||||
fileName := "" | |||||
count := 0 | |||||
fileEndLine := endLine | fileEndLine := endLine | ||||
for _, file := range files { | for _, file := range files { | ||||
if strings.HasSuffix(file.FileName, "log.txt") { | if strings.HasSuffix(file.FileName, "log.txt") { | ||||
fileName = file.FileName | fileName = file.FileName | ||||
path := storage.GetMinioPath(jobName+"/model/", file.FileName) | |||||
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName) | |||||
log.Info("path=" + path) | log.Info("path=" + path) | ||||
reader, err := os.Open(path) | reader, err := os.Open(path) | ||||
defer reader.Close() | defer reader.Close() | ||||
@@ -467,7 +574,6 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i | |||||
for i := 0; i < endLine; i++ { | for i := 0; i < endLine; i++ { | ||||
line, error := r.ReadString('\n') | line, error := r.ReadString('\n') | ||||
log.Info("line=" + line) | log.Info("line=" + line) | ||||
fileEndLine = i | |||||
if error == io.EOF { | if error == io.EOF { | ||||
log.Info("read file completed.") | log.Info("read file completed.") | ||||
break | break | ||||
@@ -478,11 +584,13 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i | |||||
} | } | ||||
if error == nil { | if error == nil { | ||||
if i >= startLine { | if i >= startLine { | ||||
fileEndLine = i | |||||
re = re + line | re = re + line | ||||
count++ | count++ | ||||
} | } | ||||
} | } | ||||
} | } | ||||
fileEndLine = fileEndLine + 1 | |||||
} else { | } else { | ||||
log.Info("error:" + err.Error()) | log.Info("error:" + err.Error()) | ||||
} | } | ||||
@@ -491,11 +599,12 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i | |||||
} | } | ||||
return map[string]interface{}{ | return map[string]interface{}{ | ||||
"JobName": jobName, | |||||
"Content": re, | |||||
"FileName": fileName, | |||||
"lines": count, | |||||
"endLine": fileEndLine, | |||||
"JobName": jobName, | |||||
"Content": re, | |||||
"FileName": fileName, | |||||
"Lines": count, | |||||
"EndLine": fileEndLine, | |||||
"StartLine": startLine, | |||||
} | } | ||||
} | } | ||||
@@ -2335,7 +2335,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||||
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form) | ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form) | ||||
return | return | ||||
} | } | ||||
log.Info("Command=" + command) | |||||
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/")) | |||||
req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
Ctx: ctx, | Ctx: ctx, | ||||
DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
@@ -2471,6 +2472,8 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||||
ctx.RenderWithErr("Resource specification not available", tpl, &form) | ctx.RenderWithErr("Resource specification not available", tpl, &form) | ||||
return | return | ||||
} | } | ||||
log.Info("Command=" + command) | |||||
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/")) | |||||
req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
Ctx: ctx, | Ctx: ctx, | ||||
DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
@@ -2598,7 +2601,7 @@ func getInferenceJobCommand(form auth.CreateCloudBrainInferencForm) (string, err | |||||
param += " --modelname" + "=" + form.CkptName | param += " --modelname" + "=" + form.CkptName | ||||
command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile | |||||
command += "python /code/" + bootFile + param + " | tee " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile | |||||
return command, nil | return command, nil | ||||
} | } | ||||
@@ -2627,7 +2630,7 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { | |||||
} | } | ||||
} | } | ||||
command += "python /code/" + bootFile + param + " | tee " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile | |||||
command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile | |||||
return command, nil | return command, nil | ||||
} | } | ||||
@@ -2672,6 +2675,8 @@ func GetCloudbrainAiCenter(task models.Cloudbrain, ctx *context.Context) string | |||||
return ctx.Tr("repo.cloudbrain1") | return ctx.Tr("repo.cloudbrain1") | ||||
} else if task.Type == models.TypeCloudBrainTwo { | } else if task.Type == models.TypeCloudBrainTwo { | ||||
return ctx.Tr("repo.cloudbrain2") | return ctx.Tr("repo.cloudbrain2") | ||||
} else if task.Type == models.TypeCDCenter { | |||||
return ctx.Tr("repo.cdCenter") | |||||
} else if task.Type == models.TypeC2Net { | } else if task.Type == models.TypeC2Net { | ||||
return getCutStringAiCenterByAiCenter(task.AiCenter) | return getCutStringAiCenterByAiCenter(task.AiCenter) | ||||
} | } | ||||
@@ -2686,7 +2691,7 @@ func getCutStringAiCenterByAiCenter(aiCenter string) string { | |||||
} | } | ||||
func GetCloudbrainCluster(task models.Cloudbrain, ctx *context.Context) string { | func GetCloudbrainCluster(task models.Cloudbrain, ctx *context.Context) string { | ||||
if task.Type == models.TypeCloudBrainOne || task.Type == models.TypeCloudBrainTwo { | |||||
if task.Type == models.TypeCloudBrainOne || task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter { | |||||
return ctx.Tr("cloudbrain.resource_cluster_openi") | return ctx.Tr("cloudbrain.resource_cluster_openi") | ||||
} else if task.Type == models.TypeC2Net { | } else if task.Type == models.TypeC2Net { | ||||
return ctx.Tr("cloudbrain.resource_cluster_c2net") | return ctx.Tr("cloudbrain.resource_cluster_c2net") | ||||
@@ -2773,10 +2778,10 @@ func GetCloudbrainFlavorName(task models.Cloudbrain) (string, error) { | |||||
return CloudbrainOneFlavorName, nil | return CloudbrainOneFlavorName, nil | ||||
} | } | ||||
} | } | ||||
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeC2Net) && task.FlavorName != "" { | |||||
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeC2Net || task.Type == models.TypeCDCenter) && task.FlavorName != "" { | |||||
replaceFlavorName := strings.ReplaceAll(task.FlavorName, ":", ":") | replaceFlavorName := strings.ReplaceAll(task.FlavorName, ":", ":") | ||||
return replaceFlavorName, nil | return replaceFlavorName, nil | ||||
} else if task.Type == models.TypeCloudBrainTwo && task.FlavorName == "" && task.FlavorCode != "" { | |||||
} else if (task.Type == models.TypeCloudBrainTwo || task.Type == models.TypeCDCenter) && task.FlavorName == "" && task.FlavorCode != "" { | |||||
cloudbrainTwoFlavorName := getFlavorNameByFlavorCode(task.FlavorCode) | cloudbrainTwoFlavorName := getFlavorNameByFlavorCode(task.FlavorCode) | ||||
return cloudbrainTwoFlavorName, nil | return cloudbrainTwoFlavorName, nil | ||||
} else if task.Type == models.TypeCloudBrainTwo && task.JobType == string(models.JobTypeDebug) && task.FlavorName == "" && task.FlavorCode == "" { | } else if task.Type == models.TypeCloudBrainTwo && task.JobType == string(models.JobTypeDebug) && task.FlavorName == "" && task.FlavorCode == "" { | ||||
@@ -2,6 +2,7 @@ package repo | |||||
import ( | import ( | ||||
"archive/zip" | "archive/zip" | ||||
"code.gitea.io/gitea/modules/modelarts_cd" | |||||
"code.gitea.io/gitea/services/cloudbrain/resource" | "code.gitea.io/gitea/services/cloudbrain/resource" | ||||
"encoding/json" | "encoding/json" | ||||
"errors" | "errors" | ||||
@@ -60,18 +61,11 @@ func DebugJobIndex(ctx *context.Context) { | |||||
if page <= 0 { | if page <= 0 { | ||||
page = 1 | page = 1 | ||||
} | } | ||||
typeCloudBrain := models.TypeCloudBrainAll | |||||
jobTypeNot := false | jobTypeNot := false | ||||
if listType == models.GPUResource { | |||||
typeCloudBrain = models.TypeCloudBrainOne | |||||
} else if listType == models.NPUResource { | |||||
typeCloudBrain = models.TypeCloudBrainTwo | |||||
} else if listType == models.AllResource { | |||||
typeCloudBrain = models.TypeCloudBrainAll | |||||
} else { | |||||
log.Error("listType(%s) error", listType) | |||||
ctx.ServerError("listType error", errors.New("listType error")) | |||||
return | |||||
var computeResource string | |||||
if listType != models.AllResource { | |||||
computeResource = listType | |||||
} | } | ||||
var jobTypes []string | var jobTypes []string | ||||
@@ -81,10 +75,11 @@ func DebugJobIndex(ctx *context.Context) { | |||||
Page: page, | Page: page, | ||||
PageSize: setting.UI.IssuePagingNum, | PageSize: setting.UI.IssuePagingNum, | ||||
}, | }, | ||||
RepoID: repo.ID, | |||||
Type: typeCloudBrain, | |||||
JobTypeNot: jobTypeNot, | |||||
JobTypes: jobTypes, | |||||
RepoID: repo.ID, | |||||
ComputeResource: computeResource, | |||||
Type: models.TypeCloudBrainAll, | |||||
JobTypeNot: jobTypeNot, | |||||
JobTypes: jobTypes, | |||||
}) | }) | ||||
if err != nil { | if err != nil { | ||||
ctx.ServerError("Get debugjob faild:", err) | ctx.ServerError("Get debugjob faild:", err) | ||||
@@ -134,6 +129,7 @@ func notebookNewDataPrepare(ctx *context.Context) error { | |||||
return err | return err | ||||
} | } | ||||
ctx.Data["attachments"] = attachs | ctx.Data["attachments"] = attachs | ||||
ctx.Data["images"] = setting.StImageInfos.ImageInfo | |||||
if modelarts.ImageInfos == nil { | if modelarts.ImageInfos == nil { | ||||
json.Unmarshal([]byte(setting.ImageInfos), &modelarts.ImageInfos) | json.Unmarshal([]byte(setting.ImageInfos), &modelarts.ImageInfos) | ||||
@@ -256,6 +252,13 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm | |||||
return | return | ||||
} | } | ||||
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec) | err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec) | ||||
if setting.ModelartsCD.Enabled { | |||||
err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, flavor, imageId) | |||||
} else { | |||||
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId) | |||||
} | |||||
if err != nil { | if err != nil { | ||||
log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) | log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) | ||||
notebookNewDataPrepare(ctx) | notebookNewDataPrepare(ctx) | ||||
@@ -385,36 +388,16 @@ func setShowSpecBySpecialPoolConfig(ctx *context.Context, findSpec bool, task *m | |||||
} | } | ||||
} | } | ||||
func NotebookDebug(ctx *context.Context) { | |||||
var jobID = ctx.Params(":jobid") | |||||
result, err := modelarts.GetJob(jobID) | |||||
if err != nil { | |||||
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) | |||||
return | |||||
} | |||||
res, err := modelarts.GetJobToken(jobID) | |||||
if err != nil { | |||||
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) | |||||
return | |||||
} | |||||
urls := strings.Split(result.Spec.Annotations.Url, "/") | |||||
urlPrefix := result.Spec.Annotations.TargetDomain | |||||
for i, url := range urls { | |||||
if i > 2 { | |||||
urlPrefix += "/" + url | |||||
} | |||||
} | |||||
debugUrl := urlPrefix + "?token=" + res.Token | |||||
ctx.Redirect(debugUrl) | |||||
} | |||||
func NotebookDebug2(ctx *context.Context) { | func NotebookDebug2(ctx *context.Context) { | ||||
var err error | |||||
var result *models.GetNotebook2Result | |||||
task := ctx.Cloudbrain | task := ctx.Cloudbrain | ||||
result, err := modelarts.GetNotebook2(task.JobID) | |||||
if task.Type == models.TypeCloudBrainTwo { | |||||
result, err = modelarts.GetNotebook2(task.JobID) | |||||
} else if task.Type == models.TypeCDCenter { | |||||
result, err = modelarts_cd.GetNotebook(task.JobID) | |||||
} | |||||
if err != nil { | if err != nil { | ||||
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) | ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil) | ||||
return | return | ||||
@@ -478,7 +461,13 @@ func NotebookRestart(ctx *context.Context) { | |||||
Action: models.ActionStart, | Action: models.ActionStart, | ||||
} | } | ||||
res, err := modelarts.ManageNotebook2(task.JobID, param) | |||||
var res *models.NotebookActionResult | |||||
if task.Type == models.TypeCloudBrainTwo { | |||||
res, err = modelarts.ManageNotebook2(task.JobID, param) | |||||
} else if task.Type == models.TypeCDCenter { | |||||
res, err = modelarts_cd.ManageNotebook(task.JobID, param) | |||||
} | |||||
if err != nil { | if err != nil { | ||||
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"]) | log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"]) | ||||
/* 暂不处理再次调试502的场景,详情见方案 | /* 暂不处理再次调试502的场景,详情见方案 | ||||
@@ -563,7 +552,14 @@ func NotebookStop(ctx *context.Context) { | |||||
Action: models.ActionStop, | Action: models.ActionStop, | ||||
} | } | ||||
res, err := modelarts.ManageNotebook2(task.JobID, param) | |||||
var err error | |||||
var res *models.NotebookActionResult | |||||
if task.Type == models.TypeCloudBrainTwo { | |||||
res, err = modelarts.ManageNotebook2(task.JobID, param) | |||||
} else if task.Type == models.TypeCDCenter { | |||||
res, err = modelarts_cd.ManageNotebook(task.JobID, param) | |||||
} | |||||
if err != nil { | if err != nil { | ||||
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | ||||
resultCode = "-1" | resultCode = "-1" | ||||
@@ -613,7 +609,13 @@ func NotebookDel(ctx *context.Context) { | |||||
return | return | ||||
} | } | ||||
_, err := modelarts.DelNotebook2(task.JobID) | |||||
var err error | |||||
if task.Type == models.TypeCloudBrainTwo { | |||||
_, err = modelarts.DelNotebook2(task.JobID) | |||||
} else if task.Type == models.TypeCDCenter { | |||||
_, err = modelarts_cd.DelNotebook(task.JobID) | |||||
} | |||||
if err != nil { | if err != nil { | ||||
log.Error("DelNotebook2(%s) failed:%v", task.JobName, err.Error()) | log.Error("DelNotebook2(%s) failed:%v", task.JobName, err.Error()) | ||||
if strings.Contains(err.Error(), modelarts.NotebookNotFound) || strings.Contains(err.Error(), modelarts.NotebookNoPermission) || strings.Contains(err.Error(), modelarts.NotebookInvalid) { | if strings.Contains(err.Error(), modelarts.NotebookNotFound) || strings.Contains(err.Error(), modelarts.NotebookNoPermission) || strings.Contains(err.Error(), modelarts.NotebookInvalid) { | ||||
@@ -764,6 +766,8 @@ func trainJobNewDataPrepare(ctx *context.Context) error { | |||||
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | ||||
ctx.Data["WaitCount"] = waitCount | ctx.Data["WaitCount"] = waitCount | ||||
setMultiNodeIfConfigureMatch(ctx) | |||||
return nil | return nil | ||||
} | } | ||||
@@ -777,6 +781,18 @@ func prepareCloudbrainTwoTrainSpecs(ctx *context.Context) { | |||||
ctx.Data["Specs"] = noteBookSpecs | ctx.Data["Specs"] = noteBookSpecs | ||||
} | } | ||||
func setMultiNodeIfConfigureMatch(ctx *context.Context) { | |||||
modelarts.InitMultiNode() | |||||
if modelarts.MultiNodeConfig != nil { | |||||
for _, info := range modelarts.MultiNodeConfig.Info { | |||||
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, ctx.User.ID); isInOrg { | |||||
ctx.Data["WorkNode"] = info.Node | |||||
break | |||||
} | |||||
} | |||||
} | |||||
} | |||||
func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { | func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { | ||||
modelarts.InitSpecialPool() | modelarts.InitSpecialPool() | ||||
@@ -885,6 +901,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts | |||||
ctx.Data["datasetType"] = models.TypeCloudBrainTwo | ctx.Data["datasetType"] = models.TypeCloudBrainTwo | ||||
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | ||||
ctx.Data["WaitCount"] = waitCount | ctx.Data["WaitCount"] = waitCount | ||||
setMultiNodeIfConfigureMatch(ctx) | |||||
return nil | return nil | ||||
} | } | ||||
@@ -1106,6 +1123,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
VersionCount := modelarts.VersionCountOne | VersionCount := modelarts.VersionCountOne | ||||
EngineName := form.EngineName | EngineName := form.EngineName | ||||
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) | |||||
if errStr != "" { | |||||
trainJobErrorNewDataPrepare(ctx, form) | |||||
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form) | |||||
return | |||||
} | |||||
count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | ||||
if err != nil { | if err != nil { | ||||
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | ||||
@@ -1344,6 +1368,48 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | ||||
} | } | ||||
func checkMultiNode(userId int64, serverNum int) string { | |||||
if serverNum == 1 { | |||||
return "" | |||||
} | |||||
modelarts.InitMultiNode() | |||||
var isServerNumValid = false | |||||
if modelarts.MultiNodeConfig != nil { | |||||
for _, info := range modelarts.MultiNodeConfig.Info { | |||||
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg { | |||||
if isInNodes(info.Node, serverNum) { | |||||
isServerNumValid = true | |||||
break | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if isServerNumValid { | |||||
return "" | |||||
} else { | |||||
return "repo.modelarts.no_node_right" | |||||
} | |||||
} | |||||
func checkInferenceJobMultiNode(userId int64, serverNum int) string { | |||||
if serverNum == 1 { | |||||
return "" | |||||
} | |||||
return "repo.modelarts.no_node_right" | |||||
} | |||||
func isInNodes(nodes []int, num int) bool { | |||||
for _, node := range nodes { | |||||
if node == num { | |||||
return true | |||||
} | |||||
} | |||||
return false | |||||
} | |||||
func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, string) { | func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, string) { | ||||
userImageUrl := "" | userImageUrl := "" | ||||
userCommand := "" | userCommand := "" | ||||
@@ -1378,6 +1444,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
ctx.Data["PageIsTrainJob"] = true | ctx.Data["PageIsTrainJob"] = true | ||||
var jobID = ctx.Params(":jobid") | var jobID = ctx.Params(":jobid") | ||||
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) | |||||
if errStr != "" { | |||||
versionErrorDataPrepare(ctx, form) | |||||
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form) | |||||
return | |||||
} | |||||
count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | ||||
if err != nil { | if err != nil { | ||||
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | ||||
@@ -1717,10 +1790,6 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { | |||||
return errors.New("启动文件必须是python文件") | return errors.New("启动文件必须是python文件") | ||||
} | } | ||||
if form.WorkServerNumber > 2 || form.WorkServerNumber < 1 { | |||||
log.Error("the WorkServerNumber(%d) must be in (1,2)", form.WorkServerNumber) | |||||
return errors.New("计算节点数必须在1-2之间") | |||||
} | |||||
if form.BranchName == "" { | if form.BranchName == "" { | ||||
log.Error("the branch must not be null!", form.BranchName) | log.Error("the branch must not be null!", form.BranchName) | ||||
return errors.New("代码分支不能为空!") | return errors.New("代码分支不能为空!") | ||||
@@ -2025,6 +2094,13 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
ckptUrl := "/" + form.TrainUrl + form.CkptName | ckptUrl := "/" + form.TrainUrl + form.CkptName | ||||
log.Info("ckpt url:" + ckptUrl) | log.Info("ckpt url:" + ckptUrl) | ||||
errStr := checkInferenceJobMultiNode(ctx.User.ID, form.WorkServerNumber) | |||||
if errStr != "" { | |||||
inferenceJobErrorNewDataPrepare(ctx, form) | |||||
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form) | |||||
return | |||||
} | |||||
count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) | ||||
if err != nil { | if err != nil { | ||||
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | ||||
@@ -2252,7 +2328,7 @@ func checkModelArtsSpecialPool(ctx *context.Context, flavorCode string, jobType | |||||
if !isMatchPool { | if !isMatchPool { | ||||
isMatchSpec := false | isMatchSpec := false | ||||
if jobType == string(models.JobTypeDebug) { | if jobType == string(models.JobTypeDebug) { | ||||
for _, flavor := range modelarts.FlavorInfos.FlavorInfo { | |||||
for _, flavor := range setting.StFlavorInfo.FlavorInfo { | |||||
if flavor.Value == flavorCode { | if flavor.Value == flavorCode { | ||||
isMatchSpec = true | isMatchSpec = true | ||||
break | break | ||||
@@ -256,8 +256,9 @@ | |||||
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);"> | <div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);"> | ||||
<a class="active item" | <a class="active item" | ||||
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | ||||
<a class="item" data-tab="second{{$k}}" | |||||
onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||||
<a class="item log_bottom" data-tab="second{{$k}}" | |||||
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||||
</div> | </div> | ||||
<div class="ui tab active" data-tab="first{{$k}}"> | <div class="ui tab active" data-tab="first{{$k}}"> | ||||
<div style="padding-top: 10px;"> | <div style="padding-top: 10px;"> | ||||
@@ -528,19 +529,42 @@ | |||||
</div> | </div> | ||||
<div class="ui tab" data-tab="second{{$k}}"> | <div class="ui tab" data-tab="second{{$k}}"> | ||||
<div> | <div> | ||||
<div class="ui message message{{.VersionName}}" style="display: none;"> | |||||
<div id="header"></div> | |||||
</div> | |||||
<div class="ui attached log" id="log{{.VersionName}}" | |||||
style="height: 300px !important; overflow: auto;"> | |||||
<input type="hidden" name="end_line" value> | |||||
<input type="hidden" name="start_line" value> | |||||
<pre id="log_file{{.VersionName}}"></pre> | |||||
</div> | |||||
</div> | |||||
</div> | |||||
<a id="{{.VersionName}}-log-down" | |||||
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}' | |||||
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file"> | |||||
<i class="ri-download-cloud-2-line"></i> | |||||
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span> | |||||
</a> | |||||
</div> | |||||
<div | |||||
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;"> | |||||
<span> | |||||
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;" | |||||
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a> | |||||
</span> | |||||
<span class="log-info-{{.VersionName}}"> | |||||
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;" | |||||
class="log_bottom" data-version="{{.VersionName}}"><i | |||||
class="icon-to-bottom"></i></a> | |||||
</span> | |||||
<div class="ui message message{{.VersionName}}" style="display: none;"> | |||||
<div id="header"></div> | |||||
</div> | |||||
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}" | |||||
style="height: 300px !important; overflow: auto;"> | |||||
<div class="ui inverted active dimmer"> | |||||
<div class="ui loader"></div> | |||||
</div> | |||||
<input type="hidden" name="end_line" value> | |||||
<input type="hidden" name="start_line" value> | |||||
<pre id="log_file{{.VersionName}}"></pre> | |||||
</div> | |||||
</div> | |||||
</div> | |||||
</div> | </div> | ||||
</div> | </div> | ||||
@@ -228,7 +228,7 @@ | |||||
</h4> | </h4> | ||||
{{with .task}} | {{with .task}} | ||||
<div class="ui accordion border-according" id="accordion{{.VersionName}}" | <div class="ui accordion border-according" id="accordion{{.VersionName}}" | ||||
data-repopath="{{$.RepoRelPath}}/cloudbrain/inference-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}"> | |||||
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}"> | |||||
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}"> | <input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}"> | ||||
<div class="active title padding0"> | <div class="active title padding0"> | ||||
<div class="according-panel-heading"> | <div class="according-panel-heading"> | ||||
@@ -264,7 +264,8 @@ | |||||
data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | ||||
<a class="item" data-tab="second" | <a class="item" data-tab="second" | ||||
onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a> | onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a> | ||||
<a class="item log_bottom" data-tab="third" | |||||
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||||
<a class="item load-model-file" data-tab="four" | <a class="item load-model-file" data-tab="four" | ||||
data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/inference-job/{{.JobID}}/result_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/inference-job/{{.JobID}}/result_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | ||||
</div> | </div> | ||||
@@ -524,7 +525,7 @@ | |||||
<div class="ui message message{{.VersionName}}" style="display: none;"> | <div class="ui message message{{.VersionName}}" style="display: none;"> | ||||
<div id="header"></div> | <div id="header"></div> | ||||
</div> | </div> | ||||
<div class="ui attached log" id="log{{.VersionName}}" | |||||
<div class="ui attached" | |||||
style="height: 390px !important; overflow: auto;"> | style="height: 390px !important; overflow: auto;"> | ||||
<input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}"> | <input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}"> | ||||
<input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}"> | <input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}"> | ||||
@@ -537,7 +538,44 @@ | |||||
</div> | </div> | ||||
<div class="ui tab" data-tab="third"> | |||||
<div> | |||||
<a id="{{.VersionName}}-log-down" | |||||
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}' | |||||
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file"> | |||||
<i class="ri-download-cloud-2-line"></i> | |||||
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span> | |||||
</a> | |||||
</div> | |||||
<div | |||||
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;"> | |||||
<span> | |||||
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;" | |||||
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a> | |||||
</span> | |||||
<span class="log-info-{{.VersionName}}"> | |||||
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;" | |||||
class="log_bottom" data-version="{{.VersionName}}"><i | |||||
class="icon-to-bottom"></i></a> | |||||
</span> | |||||
<div class="ui message message{{.VersionName}}" style="display: none;"> | |||||
<div id="header"></div> | |||||
</div> | |||||
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}" | |||||
style="height: 300px !important; overflow: auto;"> | |||||
<div class="ui inverted active dimmer"> | |||||
<div class="ui loader"></div> | |||||
</div> | |||||
<input type="hidden" name="end_line" value> | |||||
<input type="hidden" name="start_line" value> | |||||
<pre id="log_file{{.VersionName}}"></pre> | |||||
</div> | |||||
</div> | |||||
</div> | |||||
<div class="ui tab" data-tab="four"> | <div class="ui tab" data-tab="four"> | ||||
<input type="hidden" name="model{{.VersionName}}" value="-1"> | <input type="hidden" name="model{{.VersionName}}" value="-1"> | ||||
@@ -294,8 +294,24 @@ | |||||
id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1" | id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1" | ||||
readonly> | readonly> | ||||
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select"> | <div class="field" id="trainjob_work_server_num_select" name="work_server_number_select"> | ||||
<select class="ui dropdown width" style='width: 100%;' name="work_server_id"> | |||||
<select class="ui dropdown width" style='width: 100%;' name="work_server_id"> | |||||
{{if .WorkNode}} | |||||
{{range .WorkNode}} | |||||
{{if $.work_server_number}} | |||||
{{if eq . $.work_server_number }} | |||||
<option name="server_id" selected value="{{.}}">{{.}}</option> | |||||
{{else}} | |||||
<option name="server_id" value="{{.}}">{{.}}</option> | |||||
{{end}} | |||||
{{else}} | |||||
<option name="server_id" value="{{.}}">{{.}}</option> | |||||
{{end}} | |||||
{{end}} | |||||
{{else}} | |||||
<option name="server_id" value="1">1</option> | <option name="server_id" value="1">1</option> | ||||
{{end}} | |||||
</select> | </select> | ||||
</div> | </div> | ||||