From 18cafe9890ed436d426ed01e8819096640607c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=8B=E6=B1=9F=E5=A4=A9=E6=9E=A2?= Date: Wed, 22 Dec 2021 11:01:32 +0800 Subject: [PATCH] add tadl --- dubhe-server/README.md | 6 +- .../java/org/dubhe/admin/AdminApplication.java | 4 + .../admin/async/CleanupUserResourcesAsync.java | 68 ++ .../org/dubhe/admin/client/GpuConfigClient.java | 42 + .../admin/client/ResourceNamespaceClient.java | 60 + .../dubhe/admin/client/ResourceQuotaClient.java | 10 +- .../dubhe/admin/client/SystemNamespaceClient.java | 42 + .../org/dubhe/admin/client/SystemNodeClient.java | 45 + .../client/fallback/GpuConfigClientFallback.java | 33 + .../fallback/ResourceNamespaceClientFallback.java | 42 + .../fallback/ResourceQuotaClientFallback.java | 4 +- .../fallback/SystemNamespaceClientFallback.java | 35 + .../client/fallback/SystemNodeClientFallback.java | 37 + .../client/template/GpuConfigTemplateClient.java | 60 + .../admin/client/template/ObtainAccessToken.java | 91 ++ .../template/ResourceQuotaTemplateClient.java | 60 + .../org/dubhe/admin/config/AdminPoolConfig.java | 79 ++ .../org/dubhe/admin/dao/GpuResourceMapper.java | 27 + .../java/org/dubhe/admin/dao/UserConfigMapper.java | 46 +- .../org/dubhe/admin/dao/UserGpuConfigMapper.java | 81 ++ .../main/java/org/dubhe/admin/dao/UserMapper.java | 16 + .../dubhe/admin/domain/dto/AuthCodeQueryDTO.java | 2 +- .../admin/domain/dto/GpuResourceCreateDTO.java | 57 + .../admin/domain/dto/GpuResourceDeleteDTO.java | 40 + .../admin/domain/dto/GpuResourceQueryDTO.java | 45 + .../admin/domain/dto/GpuResourceUpdateDTO.java | 64 + .../org/dubhe/admin/domain/dto/MenuQueryDTO.java | 3 +- .../domain/dto/QueryUserResourceSpecsDTO.java | 65 + .../admin/domain/dto/ResourceSpecsCreateDTO.java | 4 +- .../admin/domain/dto/ResourceSpecsQueryDTO.java | 7 +- .../admin/domain/dto/ResourceSpecsUpdateDTO.java | 4 +- .../admin/domain/dto/UserGpuResourceQueryDTO.java | 46 + .../admin/domain/dto/UserGroupConfigSaveDTO.java | 35 + .../admin/domain/dto/UserResourceListDTO.java | 31 + .../admin/domain/dto/UserResourceQueryDTO.java | 41 + .../org/dubhe/admin/domain/entity/GpuResource.java | 63 + .../org/dubhe/admin/domain/entity/UserConfig.java | 10 +- .../dubhe/admin/domain/entity/UserGpuConfig.java | 80 ++ .../dubhe/admin/domain/vo/GpuResourceQueryVO.java | 59 + .../java/org/dubhe/admin/domain/vo/MenuVo.java | 5 +- .../dubhe/admin/domain/vo/UserLimitConfigVO.java | 39 + .../dubhe/admin/domain/vo/UserResourceResVO.java | 91 ++ .../org/dubhe/admin/enums/ResourceTypeEnum.java | 54 + .../java/org/dubhe/admin/enums/StatTypeEnum.java | 50 + .../org/dubhe/admin/event/EmailEventListener.java | 2 +- .../org/dubhe/admin/event/EmailEventPublisher.java | 2 +- .../dubhe/admin/rest/GpuResourceController.java | 89 ++ .../dubhe/admin/rest/ResourceSpecsController.java | 22 +- .../org/dubhe/admin/rest/UserCenterController.java | 1 - .../java/org/dubhe/admin/rest/UserController.java | 43 +- .../org/dubhe/admin/rest/UserGroupController.java | 24 +- .../dubhe/admin/rest/UserResourceController.java | 58 + .../dubhe/admin/service/GpuResourceService.java | 77 ++ .../dubhe/admin/service/ResourceSpecsService.java | 23 +- .../org/dubhe/admin/service/UserGroupService.java | 17 +- .../dubhe/admin/service/UserResourceService.java | 48 + .../java/org/dubhe/admin/service/UserService.java | 35 +- .../admin/service/impl/AuthCodeServiceImpl.java | 4 +- .../admin/service/impl/GpuResourceServiceImpl.java | 223 ++++ .../dubhe/admin/service/impl/MenuServiceImpl.java | 38 +- .../admin/service/impl/PermissionServiceImpl.java | 2 +- .../admin/service/impl/RecycleTaskServiceImpl.java | 4 +- .../service/impl/ResourceSpecsServiceImpl.java | 126 +- .../admin/service/impl/UserGroupServiceImpl.java | 56 +- .../service/impl/UserResourceServiceImpl.java | 314 +++++ .../dubhe/admin/service/impl/UserServiceImpl.java | 392 ++++-- .../admin/src/main/resources/bootstrap.yml | 6 +- .../src/main/resources/mapper/UserConfigMapper.xml | 34 +- .../main/resources/mapper/UserGpuConfigMapper.xml | 12 + .../admin/src/main/resources/mapper/UserMapper.xml | 5 + .../org/dubhe/admin/AdminApplicationTests.java | 12 + dubhe-server/auth/src/main/resources/bootstrap.yml | 6 +- .../biz/base/constant/ApplicationNameConst.java | 4 + .../org/dubhe/biz/base/constant/AuthConst.java | 8 +- .../dubhe/biz/base/constant/MagicNumConstant.java | 1 + .../dubhe/biz/base/constant/NumberConstant.java | 2 + .../org/dubhe/biz/base/constant/Permissions.java | 13 + .../dubhe/biz/base/constant/StringConstant.java | 49 +- .../dubhe/biz/base/constant/SymbolConstant.java | 2 + .../org/dubhe/biz/base/constant/UserConstant.java | 15 + .../java/org/dubhe/biz/base/dto/DeleteDTO.java | 38 + .../java/org/dubhe/biz/base/dto/GpuConfigDTO.java | 45 + .../biz/base/dto/ModelOptAlgorithmCreateDTO.java | 3 +- .../org/dubhe/biz/base/dto/NamespaceDeleteDTO.java | 38 + .../org/dubhe/biz/base/dto/PtImageQueryUrlDTO.java | 2 + .../dubhe/biz/base/dto/QueryResourceSpecsDTO.java | 4 +- .../biz/base/dto/QueryUserK8sResourceDTO.java | 121 ++ .../org/dubhe/biz/base/dto/ResourceQuotaDTO.java | 43 + .../org/dubhe/biz/base/dto/SysUserConfigDTO.java | 12 +- .../dubhe/biz/base/dto/SysUserGpuConfigDTO.java | 52 + .../org/dubhe/biz/base/dto/UserConfigSaveDTO.java | 50 + .../main/java/org/dubhe/biz/base/dto/UserDTO.java | 2 + .../org/dubhe/biz/base/dto/UserGpuConfigDTO.java | 46 + .../dubhe/biz/base/enums/BaseErrorCodeEnum.java | 2 +- .../java/org/dubhe/biz/base/enums/BizEnum.java | 5 +- .../java/org/dubhe/biz/base/utils/AesUtil.java | 11 +- .../java/org/dubhe/biz/base/utils/CommandUtil.java | 44 + .../java/org/dubhe/biz/base/utils/DateUtil.java | 21 + .../java/org/dubhe/biz/base/utils/MapUtil.java | 39 + .../java/org/dubhe/biz/base/utils/MathUtils.java | 1 + .../java/org/dubhe/biz/base/utils/PtModelUtil.java | 2 + .../java/org/dubhe/biz/base/utils/RegexUtil.java | 34 +- .../java/org/dubhe/biz/base/utils/ResultUtil.java | 7 + .../java/org/dubhe/biz/base/utils/StringUtils.java | 8 + .../java/org/dubhe/biz/base/vo/GpuAllotVO.java | 47 + .../dubhe/biz/base/vo/PtModelBranchQueryVO.java | 5 + .../org/dubhe/biz/base/vo/PtModelInfoQueryVO.java | 11 + .../dubhe/biz/base/vo/QueryResourceSpecsVO.java | 2 +- .../biz/base/vo/QueryUserResourceSpecsVO.java | 104 ++ .../org/dubhe/biz/base/vo/UserAllotResourceVO.java | 50 + .../java/org/dubhe/biz/base/vo/UserAllotVO.java | 51 + .../java/org/dubhe/biz/base/vo/UserConfigVO.java | 43 + .../org/dubhe/biz/base/vo/UserGpuConfigVO.java | 60 + .../java/org/dubhe/biz/db/base/BaseLogQuery.java | 34 + .../java/org/dubhe/biz/db/base/PageQueryBase.java | 1 + .../java/org/dubhe/biz/db/entity/PageResult.java | 50 + .../main/java/org/dubhe/biz/db/utils/PageUtil.java | 4 +- dubhe-server/common-biz/file/pom.xml | 10 + .../biz/file/api/impl/ShellFileStoreApiImpl.java | 6 +- .../main/java/org/dubhe/biz/file/utils/IOUtil.java | 25 + .../java/org/dubhe/biz/file/utils/MinioUtil.java | 32 + .../main/java/org/dubhe/biz/log/enums/LogEnum.java | 4 +- .../common-biz/log/src/main/resources/logback.xml | 2 +- dubhe-server/common-biz/redis/pom.xml | 5 + .../java/org/dubhe/biz/redis/utils/RedisUtils.java | 39 +- .../cloud/authconfig/service/AdminClient.java | 30 + .../authconfig/service/AdminClientFallback.java | 19 +- .../service/impl/OAuth2UserContextServiceImpl.java | 4 +- .../src/main/resources/bootstrap-cloud-ai.yaml | 12 + .../main/resources/bootstrap-cloud-open-dev.yml | 12 + .../main/resources/bootstrap-cloud-opendev.yaml | 12 + .../src/main/resources/bootstrap-cloud-pre.yml | 7 +- .../src/main/resources/bootstrap-prod.yml | 12 + .../org/dubhe/docker/utils/DockerCallbackTool.java | 1 + .../main/java/org/dubhe/harbor/api/HarborApi.java | 17 + .../org/dubhe/harbor/api/impl/HarborApiImpl.java | 13 + .../java/org/dubhe/k8s/api/LogMonitoringApi.java | 27 + .../main/java/org/dubhe/k8s/api/MetricsApi.java | 16 + .../src/main/java/org/dubhe/k8s/api/NodeApi.java | 42 +- .../dubhe/k8s/api/PersistentVolumeClaimApi.java | 9 + .../src/main/java/org/dubhe/k8s/api/PodApi.java | 45 + .../java/org/dubhe/k8s/api/ResourceQuotaApi.java | 28 +- .../main/java/org/dubhe/k8s/api/ServiceApi.java | 29 + .../dubhe/k8s/api/impl/DistributeTrainApiImpl.java | 54 +- .../dubhe/k8s/api/impl/DubheDeploymentApiImpl.java | 24 +- .../dubhe/k8s/api/impl/JupyterResourceApiImpl.java | 106 +- .../dubhe/k8s/api/impl/LogMonitoringApiImpl.java | 285 ++++- .../org/dubhe/k8s/api/impl/MetricsApiImpl.java | 86 +- .../org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java | 17 +- .../dubhe/k8s/api/impl/ModelServingApiImpl.java | 19 +- .../org/dubhe/k8s/api/impl/NamespaceApiImpl.java | 32 +- .../java/org/dubhe/k8s/api/impl/NodeApiImpl.java | 213 ++-- .../k8s/api/impl/PersistentVolumeClaimApiImpl.java | 23 +- .../java/org/dubhe/k8s/api/impl/PodApiImpl.java | 170 +++ .../dubhe/k8s/api/impl/ResourceQuotaApiImpl.java | 124 +- .../org/dubhe/k8s/api/impl/ServiceApiImpl.java | 72 ++ .../org/dubhe/k8s/api/impl/TerminalApiImpl.java | 58 +- .../org/dubhe/k8s/api/impl/TrainJobApiImpl.java | 102 +- .../java/org/dubhe/k8s/cache/ResourceCache.java | 6 +- .../main/java/org/dubhe/k8s/config/K8sConfig.java | 19 +- .../org/dubhe/k8s/config/PromethuesConfig.java | 56 + .../org/dubhe/k8s/constant/K8sLabelConstants.java | 17 +- .../org/dubhe/k8s/constant/K8sParamConstants.java | 23 +- .../org/dubhe/k8s/dao/K8sCallbackEventMapper.java | 27 + .../java/org/dubhe/k8s/dao/K8sGpuConfigMapper.java | 48 + .../main/java/org/dubhe/k8s/dao/K8sNodeMapper.java | 28 + .../org/dubhe/k8s/dao/K8sTaskIdentifyMapper.java | 41 + .../org/dubhe/k8s/domain/bo/BaseResourceBo.java | 90 ++ .../java/org/dubhe/k8s/domain/bo/DeploymentBO.java | 30 + .../org/dubhe/k8s/domain/bo/DistributeTrainBO.java | 34 +- .../org/dubhe/k8s/domain/bo/LogMonitoringBO.java | 21 + .../org/dubhe/k8s/domain/bo/ModelServingBO.java | 8 + .../dubhe/k8s/domain/bo/PrometheusMetricBO.java | 29 + .../k8s/domain/bo/PromethusNodeMetricsBo.java | 80 ++ .../org/dubhe/k8s/domain/bo/PtJupyterJobBO.java | 44 +- .../dubhe/k8s/domain/bo/PtJupyterResourceBO.java | 16 +- .../domain/bo/PtModelOptimizationDeploymentBO.java | 2 +- .../k8s/domain/bo/PtModelOptimizationJobBO.java | 2 +- .../org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java | 26 +- .../domain/dto/BaseK8sPodCallbackCreateDTO.java | 4 + .../org/dubhe/k8s/domain/dto/K8sGpuConfigDTO.java | 45 + .../java/org/dubhe/k8s/domain/dto/NodeInfoDTO.java | 39 + .../dubhe/k8s/domain/entity/K8sCallbackEvent.java | 62 + .../org/dubhe/k8s/domain/entity/K8sGpuConfig.java | 70 ++ .../java/org/dubhe/k8s/domain/entity/K8sNode.java | 53 + .../dubhe/k8s/domain/entity/K8sTaskIdentify.java | 48 + .../resource/BizContainerLastStateTerminated.java | 41 + .../k8s/domain/resource/BizContainerStatus.java | 7 + .../org/dubhe/k8s/domain/resource/BizService.java | 15 + .../java/org/dubhe/k8s/domain/vo/GpuUsageVO.java | 4 +- .../java/org/dubhe/k8s/domain/vo/K8sEventVO.java | 50 + .../k8s/domain/vo/K8sResourceEventResultVO.java | 44 + .../org/dubhe/k8s/domain/vo/LogMonitoringVO.java | 2 +- .../k8s/enums/BusinessLabelServiceNameEnum.java | 30 +- .../java/org/dubhe/k8s/enums/K8sEventTypeEnum.java | 71 ++ .../org/dubhe/k8s/enums/LimitsOfResourcesEnum.java | 5 +- .../dubhe/k8s/listener/DefaultPodExecListener.java | 87 ++ .../dubhe/k8s/service/K8sCallbackEventService.java | 59 + .../org/dubhe/k8s/service/K8sGpuConfigService.java | 60 + .../java/org/dubhe/k8s/service/K8sNodeService.java | 87 ++ .../service/impl/K8sCallbackEventServiceImpl.java | 139 +++ .../k8s/service/impl/K8sGpuConfigServiceImpl.java | 121 ++ .../java/org/dubhe/k8s/utils/BizConvertUtils.java | 12 +- .../java/org/dubhe/k8s/utils/K8sCallBackTool.java | 8 +- .../java/org/dubhe/k8s/utils/K8sCommonUtils.java | 64 + .../java/org/dubhe/k8s/utils/PrometheusUtil.java | 109 +- .../org/dubhe/k8s/utils/ResourceBuildUtils.java | 28 +- .../common-k8s/src/main/resources/key/id_rsa | 27 + .../common-k8s/src/main/resources/key/id_rsa.pub | 1 + .../common-k8s/src/main/resources/kubeconfig_ai | 19 + .../common-k8s/src/main/resources/kubeconfig_dev | 19 + .../common-k8s/src/main/resources/kubeconfig_pre | 19 + .../common-k8s/src/main/resources/kubeconfig_prod | 8 +- .../common-k8s/src/main/resources/kubeconfig_test | 19 + .../main/resources/mapper/K8sGpuConfigMapper.xml | 12 + .../org/dubhe/recycle/config/RecycleConfig.java | 4 + .../org/dubhe/recycle/enums/RecycleModuleEnum.java | 3 +- .../dubhe/recycle/enums/RecycleResourceEnum.java | 9 + .../java/org/dubhe/recycle/utils/RecycleTool.java | 2 +- .../algorithm/async/TrainAlgorithmUploadAsync.java | 10 - .../domain/dto/PtTrainAlgorithmCreateDTO.java | 2 +- .../domain/vo/PtTrainAlgorithmQueryVO.java | 4 + .../service/impl/PtTrainAlgorithmServiceImpl.java | 55 +- .../src/main/resources/bootstrap.yml | 6 +- .../src/main/resources/bootstrap.yml | 6 +- .../org/dubhe/task/data/DataTaskExecuteThread.java | 2 +- .../src/main/resources/bootstrap.yml | 6 +- .../java/org/dubhe/data/constant/ErrorEnum.java | 1 + .../dubhe/data/dao/DatasetVersionFileMapper.java | 8 + .../main/java/org/dubhe/data/dao/FileMapper.java | 3 +- .../org/dubhe/data/domain/bo/FileUploadBO.java | 40 + .../dubhe/data/domain/dto/BatchFileCreateDTO.java | 1 + .../dubhe/data/domain/vo/LabelGroupQueryVO.java | 4 + .../java/org/dubhe/data/rest/FileController.java | 7 + .../org/dubhe/data/rest/LabelGroupController.java | 6 +- .../data/service/DatasetVersionFileService.java | 8 + .../java/org/dubhe/data/service/FileService.java | 2 +- .../org/dubhe/data/service/LabelGroupService.java | 4 +- .../data/service/impl/DatasetServiceImpl.java | 142 ++- .../impl/DatasetVersionFileServiceImpl.java | 11 + .../service/impl/DatasetVersionServiceImpl.java | 9 + .../dubhe/data/service/impl/FileServiceImpl.java | 4 +- .../data/service/impl/LabelGroupServiceImpl.java | 33 +- .../java/org/dubhe/data/util/ConversionUtil.java | 15 +- .../dubhe-data/src/main/resources/bootstrap.yml | 6 +- .../resources/mapper/DatasetVersionFileMapper.xml | 25 + .../src/main/resources/mapper/FileMapper.xml | 11 + .../dubhe/image/async/HarborImagePushAsync.java | 7 +- .../dubhe/image/domain/dto/PtImageQueryDTO.java | 3 - .../image/domain/dto/PtImageQueryImageDTO.java | 3 - .../image/domain/dto/PtImageQueryNameDTO.java | 7 +- .../dubhe/image/domain/dto/PtImageQueryUrlDTO.java | 3 + .../dubhe/image/domain/dto/PtImageUploadDTO.java | 4 - .../org/dubhe/image/domain/entity/PtImage.java | 8 +- .../org/dubhe/image/domain/vo/PtImageQueryVO.java | 10 + .../org/dubhe/image/rest/PtImageController.java | 4 +- .../org/dubhe/image/service/PtImageService.java | 14 +- .../image/service/impl/PtImageServiceImpl.java | 223 ++-- .../dubhe-image/src/main/resources/bootstrap.yml | 6 +- dubhe-server/dubhe-k8s/pom.xml | 5 + .../org/dubhe/dubhek8s/domain/dto/NodeDTO.java | 20 +- .../dubhe/dubhek8s/domain/vo/GpuResourceVO.java | 50 + .../dubhe/dubhek8s/domain/vo/K8sAllResourceVO.java | 94 ++ .../org/dubhe/dubhek8s/domain/vo/NamespaceVO.java | 17 +- .../org/dubhe/dubhek8s/domain/vo/PodResVO.java | 5 + .../event/callback/DeploymentCallback.java | 2 +- .../dubhe/dubhek8s/event/callback/PodCallback.java | 97 +- .../dubhe/dubhek8s/handler/WebSocketServer.java | 14 +- .../dubhe/dubhek8s/observer/TadlTrialObserver.java | 70 ++ .../dubhe/dubhek8s/rest/GpuConfigController.java | 48 + .../dubhek8s/rest/ResourceQuotaController.java | 2 +- .../dubhek8s/rest/SystemNamespaceController.java | 39 +- .../dubhe/dubhek8s/rest/SystemNodeController.java | 43 +- .../dubhe/dubhek8s/service/GpuConfigService.java | 32 + .../dubhek8s/service/ResourceQuotaService.java | 2 +- .../dubhek8s/service/SystemNamespaceService.java | 9 + .../dubhe/dubhek8s/service/SystemNodeService.java | 41 +- .../service/impl/GpuConfigServiceImpl.java | 55 + .../service/impl/ResourceQuotaServiceImpl.java | 4 +- .../service/impl/SystemNamespaceServiceImpl.java | 292 +++-- .../service/impl/SystemNodeServiceImpl.java | 394 +++++- .../dubhe-k8s/src/main/resources/bootstrap.yml | 6 +- .../org/dubhe/dubhek8s/k8s/PodCallbackTest.java | 2 +- .../dubhek8s/k8s/api/LogMonitoringApiTest.java | 10 + .../org/dubhe/dubhek8s/k8s/api/NodeApiTest.java | 36 +- .../dubhek8s/k8s/api/ResourceQuotaApiTest.java | 37 +- .../dubhek8s/k8s/api/SystemNamespaceTest.java | 45 + .../org/dubhe/dubhek8s/k8s/api/SystemNodeTest.java | 53 + .../dubhe/dubhek8s/k8s/api/TrainJobApiTest.java | 32 +- .../measure/async/GenerateMeasureFileAsync.java | 7 +- .../dubhe/measure/constant/MeasureConstants.java | 5 + .../dubhe/measure/domain/vo/PtMeasureQueryVO.java | 3 + .../measure/service/impl/PtMeasureServiceImpl.java | 26 +- .../dubhe-measure/src/main/resources/bootstrap.yml | 6 +- .../model/domain/dto/PtModelBranchCreateDTO.java | 4 +- .../model/domain/dto/PtModelInfoCreateDTO.java | 4 +- .../org/dubhe/model/domain/entity/PtModelInfo.java | 6 + .../service/impl/PtModelBranchServiceImpl.java | 57 +- .../model/service/impl/PtModelInfoServiceImpl.java | 26 +- .../dubhe-model/src/main/resources/bootstrap.yml | 6 +- .../dubhe/notebook/convert/NoteBookConvert.java | 5 +- .../notebook/convert/NoteBookConvertImpl.java | 147 +++ .../notebook/convert/PtJupyterResourceConvert.java | 16 +- .../org/dubhe/notebook/dao/NoteBookMapper.java | 4 + .../notebook/domain/dto/NoteBookCreateDTO.java | 23 +- .../org/dubhe/notebook/domain/entity/NoteBook.java | 26 +- .../org/dubhe/notebook/domain/vo/NoteBookVO.java | 122 ++ .../dubhe/notebook/enums/NoteBookStatusEnum.java | 8 - .../dubhe/notebook/rest/NoteBookController.java | 7 +- .../dubhe/notebook/service/NoteBookService.java | 2 +- .../service/impl/NoteBookAsyncServiceImpl.java | 25 +- .../notebook/service/impl/NoteBookServiceImpl.java | 86 +- .../notebook/task/NoteBookStatusRefreshTask.java | 1 - .../org/dubhe/notebook/utils/NotebookUtil.java | 2 +- .../src/main/resources/bootstrap.yml | 6 +- .../optimize/domain/dto/ModelOptTaskCreateDTO.java | 32 + .../optimize/domain/dto/ModelOptTaskUpdateDTO.java | 31 + .../dubhe/optimize/domain/entity/ModelOptTask.java | 36 + .../domain/entity/ModelOptTaskInstance.java | 57 + .../domain/vo/ModelOptTaskInstanceQueryVO.java | 38 +- .../optimize/domain/vo/ModelOptTaskQueryVO.java | 24 + .../optimize/rest/ModelOptTaskController.java | 14 +- .../rest/ModelOptTaskInstanceController.java | 10 +- .../service/ModelOptTaskInstanceService.java | 6 +- .../optimize/service/ModelOptTaskService.java | 14 +- .../impl/ModelOptTaskInstanceServiceImpl.java | 79 +- .../service/impl/ModelOptTaskServiceImpl.java | 87 +- .../src/main/resources/bootstrap.yml | 6 +- .../src/main/resources/bootstrap.yml | 6 +- .../org/dubhe/serving/dao/ServingInfoMapper.java | 13 +- .../serving/domain/dto/BatchServingCreateDTO.java | 9 + .../serving/domain/dto/BatchServingUpdateDTO.java | 9 + .../serving/domain/dto/ServingModelConfigDTO.java | 9 + .../dubhe/serving/domain/entity/BatchServing.java | 16 +- .../serving/domain/entity/ServingModelConfig.java | 16 +- .../serving/domain/vo/BatchServingDetailVO.java | 9 + .../serving/domain/vo/BatchServingQueryVO.java | 3 + .../serving/domain/vo/ServingInfoDetailVO.java | 1 + .../serving/domain/vo/ServingInfoQueryVO.java | 3 + .../serving/domain/vo/ServingModelConfigVO.java | 11 +- .../dubhe/serving/rest/BatchServingController.java | 7 +- .../org/dubhe/serving/rest/ServingController.java | 25 +- .../dubhe/serving/service/BatchServingService.java | 6 +- .../org/dubhe/serving/service/ServingService.java | 21 +- .../service/impl/BatchServingServiceImpl.java | 117 +- .../serving/service/impl/ServingServiceImpl.java | 180 ++- .../dubhe/serving/task/DeployServingAsyncTask.java | 57 +- .../java/org/dubhe/serving/utils/GrpcClient.java | 5 +- .../dubhe-serving/src/main/resources/bootstrap.yml | 6 +- .../src/main/resources/server_dev.crt | 3 + .../src/main/resources/server_pre.crt | 3 + .../src/main/resources/server_prod.crt | 19 +- .../src/main/resources/server_test.crt | 20 +- dubhe-server/dubhe-tadl/pom.xml | 147 +++ .../main/java/org/dubhe/tadl/TadlApplication.java | 40 + .../org/dubhe/tadl/client/AdminServiceClient.java | 44 + .../tadl/client/fallback/AdminServiceFallback.java | 45 + .../main/java/org/dubhe/tadl/config/CmdConf.java | 68 ++ .../config/RedisStreamListenerContainerConfig.java | 126 ++ .../java/org/dubhe/tadl/config/TadlJobConfig.java | 44 + .../org/dubhe/tadl/config/TrialPoolConfig.java | 66 + .../org/dubhe/tadl/constant/RedisKeyConstant.java | 121 ++ .../java/org/dubhe/tadl/constant/TadlConstant.java | 134 +++ .../java/org/dubhe/tadl/dao/AlgorithmMapper.java | 28 + .../org/dubhe/tadl/dao/AlgorithmStageMapper.java | 37 + .../org/dubhe/tadl/dao/AlgorithmVersionMapper.java | 45 + .../java/org/dubhe/tadl/dao/ExperimentMapper.java | 45 + .../org/dubhe/tadl/dao/ExperimentStageMapper.java | 60 + .../java/org/dubhe/tadl/dao/TrialDataMapper.java | 46 + .../main/java/org/dubhe/tadl/dao/TrialMapper.java | 97 ++ .../dubhe/tadl/domain/bo/IntermediateAccuracy.java | 52 + .../main/java/org/dubhe/tadl/domain/bo/Label.java | 39 + .../org/dubhe/tadl/domain/bo/TadlTrialOutput.java | 29 + .../dubhe/tadl/domain/dto/AlgorithmCreateDTO.java | 171 +++ .../dubhe/tadl/domain/dto/AlgorithmUpdateDTO.java | 184 +++ .../tadl/domain/dto/AlgorithmVersionCreateDTO.java | 76 ++ .../tadl/domain/dto/AlgorithmVersionDeleteDTO.java | 34 + .../tadl/domain/dto/AlgorithmVersionSwitchDTO.java | 35 + .../tadl/domain/dto/AlgorithmYamlQueryDTO.java | 43 + .../tadl/domain/dto/ExperimentAndTrailDTO.java | 54 + .../tadl/domain/dto/ExperimentBestAccuracyDTO.java | 44 + .../dubhe/tadl/domain/dto/ExperimentCreateDTO.java | 151 +++ .../dto/ExperimentIntermediateAccuracyDTO.java | 49 + .../tadl/domain/dto/ExperimentLogQueryDTO.java | 41 + .../tadl/domain/dto/ExperimentMessageDTO.java | 43 + .../dubhe/tadl/domain/dto/ExperimentPauseDTO.java | 33 + .../dubhe/tadl/domain/dto/ExperimentQueryDTO.java | 67 ++ .../tadl/domain/dto/ExperimentRestartDTO.java | 42 + .../tadl/domain/dto/ExperimentRunTimeDTO.java | 44 + .../domain/dto/ExperimentStageDetailUpdateDTO.java | 55 + .../dubhe/tadl/domain/dto/ExperimentStopDTO.java | 42 + .../dubhe/tadl/domain/dto/ExperimentUpdateDTO.java | 162 +++ .../tadl/domain/dto/MaxExecDurationUpdateDTO.java | 57 + .../tadl/domain/dto/MaxTrialNumUpdateDTO.java | 52 + .../dto/TadlTrialIntermediateAccuracyDTO.java | 58 + .../domain/dto/TrialConcurrentNumUpdateDTO.java | 53 + .../java/org/dubhe/tadl/domain/dto/TrialDTO.java | 54 + .../org/dubhe/tadl/domain/dto/TrialDeleteDTO.java | 43 + .../domain/dto/TrialK8sPodCallBackCreateDTO.java | 37 + .../dubhe/tadl/domain/dto/TrialLogQueryDTO.java | 41 + .../dubhe/tadl/domain/dto/TrialRunParamDTO.java | 110 ++ .../dubhe/tadl/domain/dto/TrialStopParamDTO.java | 46 + .../dubhe/tadl/domain/dto/UpdateStageYamlDTO.java | 55 + .../org/dubhe/tadl/domain/entity/Algorithm.java | 102 ++ .../dubhe/tadl/domain/entity/AlgorithmStage.java | 164 +++ .../dubhe/tadl/domain/entity/AlgorithmVersion.java | 70 ++ .../org/dubhe/tadl/domain/entity/Experiment.java | 119 ++ .../dubhe/tadl/domain/entity/ExperimentStage.java | 156 +++ .../java/org/dubhe/tadl/domain/entity/Trial.java | 146 +++ .../org/dubhe/tadl/domain/entity/TrialData.java | 85 ++ .../tadl/domain/vo/AlgorithmNextVersionVO.java | 41 + .../org/dubhe/tadl/domain/vo/AlgorithmStageVO.java | 122 ++ .../java/org/dubhe/tadl/domain/vo/AlgorithmVO.java | 89 ++ .../dubhe/tadl/domain/vo/AlgorithmVersionVO.java | 66 + .../dubhe/tadl/domain/vo/BestAccuracyDataVO.java | 25 + .../dubhe/tadl/domain/vo/BestAccuracyOutVO.java | 34 + .../org/dubhe/tadl/domain/vo/ExperimentFileVO.java | 36 + .../dubhe/tadl/domain/vo/ExperimentLogQueryVO.java | 50 + .../dubhe/tadl/domain/vo/ExperimentQueryVO.java | 86 ++ .../tadl/domain/vo/ExperimentStageParamVO.java | 51 + .../dubhe/tadl/domain/vo/ExperimentStageVO.java | 144 +++ .../org/dubhe/tadl/domain/vo/ExperimentVO.java | 181 +++ .../org/dubhe/tadl/domain/vo/FieldConfigOutVO.java | 40 + .../tadl/domain/vo/IntermediateAccuracyDataVO.java | 26 + .../tadl/domain/vo/IntermediateAccuracyVO.java | 35 + .../org/dubhe/tadl/domain/vo/RumTimeDataVO.java | 25 + .../org/dubhe/tadl/domain/vo/RunTimeOutVO.java | 34 + .../org/dubhe/tadl/domain/vo/RuntimeParamVO.java | 47 + .../org/dubhe/tadl/domain/vo/StageOutlineVO.java | 32 + .../dubhe/tadl/domain/vo/TadlTrialAccuracyVO.java | 39 + .../domain/vo/TrialIntermediateAccuracyVO.java | 28 + .../java/org/dubhe/tadl/domain/vo/TrialListVO.java | 90 ++ .../org/dubhe/tadl/domain/vo/TrialLogQueryVO.java | 50 + .../org/dubhe/tadl/domain/vo/TrialResultVO.java | 26 + .../dubhe/tadl/domain/vo/TrialResultValueVO.java | 27 + .../java/org/dubhe/tadl/domain/vo/TrialVO.java | 85 ++ .../dubhe/tadl/enums/ExperimentStageStateEnum.java | 100 ++ .../org/dubhe/tadl/enums/ExperimentStatusEnum.java | 195 +++ .../java/org/dubhe/tadl/enums/ModelTypeEnum.java | 55 + .../main/java/org/dubhe/tadl/enums/StageEnum.java | 108 ++ .../java/org/dubhe/tadl/enums/TadlErrorEnum.java | 78 ++ .../java/org/dubhe/tadl/enums/TimeUnitEnum.java | 104 ++ .../java/org/dubhe/tadl/enums/TrialStatusEnum.java | 106 ++ .../dubhe/tadl/listener/RedisStreamListener.java | 41 + .../constant/ExperimentEventMachineConstant.java | 59 + .../ExperimentStageEventMachineConstant.java | 57 + .../constant/TrialEventMachineConstant.java | 63 + .../tadl/machine/proxy/StateMachineProxy.java | 61 + .../state/AbstractExperimentStageState.java | 80 ++ .../machine/state/AbstractExperimentState.java | 61 + .../tadl/machine/state/AbstractTrialState.java | 69 ++ .../specific/experiment/FailedExperimentState.java | 68 ++ .../experiment/FinishedExperimentState.java | 43 + .../specific/experiment/PausedExperimentState.java | 88 ++ .../experiment/RunningExperimentState.java | 77 ++ .../specific/experiment/ToRunExperimentState.java | 63 + .../experiment/WaitingExperimentState.java | 69 ++ .../FailedExperimentStageState.java | 61 + .../FinishedExperimentStageState.java | 30 + .../RunningExperimentStageState.java | 153 +++ .../experimentstage/ToRunExperimentStageState.java | 81 ++ .../state/specific/trial/FailedTrialState.java | 68 ++ .../state/specific/trial/FinishedTrialState.java | 29 + .../state/specific/trial/RunningTrialState.java | 102 ++ .../state/specific/trial/ToRunTrialState.java | 79 ++ .../state/specific/trial/UnknownTrialState.java | 68 ++ .../state/specific/trial/WaitingTrialState.java | 105 ++ .../statemachine/ExperimentStageStateMachine.java | 185 +++ .../statemachine/ExperimentStateMachine.java | 191 +++ .../machine/statemachine/GlobalStateMachine.java | 48 + .../machine/statemachine/TrialStateMachine.java | 238 ++++ .../utils/identify/StateMachineStatusUtil.java | 55 + .../machine/utils/identify/StateMachineUtil.java | 50 + .../tadl/machine/utils/identify/data/DataHub.java | 93 ++ .../setting/ExperimentStageStateSelect.java | 44 + .../identify/setting/ExperimentStateSelect.java | 44 + .../identify/setting/StateIdentifySetting.java | 47 + .../org/dubhe/tadl/rest/AlgorithmController.java | 121 ++ .../org/dubhe/tadl/rest/ExperimentController.java | 156 +++ .../dubhe/tadl/rest/ExperimentStageController.java | 109 ++ .../dubhe/tadl/rest/K8sCallbackPodController.java | 64 + .../org/dubhe/tadl/rest/TadlTrialController.java | 53 + .../java/org/dubhe/tadl/schedule/TadlSchedule.java | 125 ++ .../org/dubhe/tadl/service/AlgorithmService.java | 131 ++ .../dubhe/tadl/service/AlgorithmStageService.java | 80 ++ .../tadl/service/AlgorithmVersionService.java | 97 ++ .../org/dubhe/tadl/service/ExperimentService.java | 213 ++++ .../dubhe/tadl/service/ExperimentStageService.java | 206 ++++ .../org/dubhe/tadl/service/TadlRedisService.java | 75 ++ .../org/dubhe/tadl/service/TadlTrialService.java | 118 ++ .../org/dubhe/tadl/service/TrialDataService.java | 60 + .../tadl/service/impl/AlgorithmServiceImpl.java | 712 +++++++++++ .../service/impl/AlgorithmStageServiceImpl.java | 107 ++ .../service/impl/AlgorithmVersionServiceImpl.java | 123 ++ .../tadl/service/impl/ExperimentServiceImpl.java | 1258 ++++++++++++++++++++ .../service/impl/ExperimentStageServiceImpl.java | 917 ++++++++++++++ .../tadl/service/impl/TadlRedisServiceImpl.java | 656 ++++++++++ .../tadl/service/impl/TadlTrialServiceImpl.java | 304 +++++ .../tadl/service/impl/TrialAsyncServiceImpl.java | 54 + .../tadl/service/impl/TrialDataServiceImpl.java | 79 ++ .../dubhe/tadl/task/TadlAlgorithmRecycleFile.java | 74 ++ .../dubhe/tadl/task/TadlExperimentRecycleFile.java | 69 ++ .../org/dubhe/tadl/task/TrialJobAsyncTask.java | 219 ++++ .../main/java/org/dubhe/tadl/utils/CmdUtil.java | 262 ++++ .../org/dubhe/tadl/utils/KeyNameConvertUtil.java | 73 ++ .../main/java/org/dubhe/tadl/utils/PathUtil.java | 274 +++++ .../org/dubhe/tadl/utils/TimeCalculateUtil.java | 103 ++ .../java/org/dubhe/tadl/utils/YamlParseUtil.java | 68 ++ .../dubhe-tadl/src/main/resources/banner.txt | 11 + .../dubhe-tadl/src/main/resources/bootstrap.yml | 44 + .../src/main/resources/mapper/AlgorithmMapper.xml | 6 + .../main/resources/mapper/AlgorithmStageMapper.xml | 6 + .../resources/mapper/AlgorithmVersionMapper.xml | 6 + .../src/main/resources/mapper/ExperimentMapper.xml | 31 + .../resources/mapper/ExperimentStageMapper.xml | 57 + .../src/main/resources/mapper/TrialDataMapper.xml | 25 + .../src/main/resources/mapper/TrialMapper.xml | 70 ++ .../java/org/dubhe/tadl/TrialJobAsyncTaskTest.java | 66 + dubhe-server/dubhe-terminal/pom.xml | 5 + .../terminal/async/PreserveTerminalAsync.java | 83 ++ .../dubhe/terminal/config/TerminalPoolConfig.java | 81 ++ .../terminal/domain/dto/TerminalCreateDTO.java | 2 +- .../terminal/domain/dto/TerminalDetailDTO.java | 37 + .../dubhe/terminal/domain/dto/TerminalInfoDTO.java | 20 +- .../org/dubhe/terminal/domain/entity/Terminal.java | 7 + .../dubhe/terminal/domain/entity/TerminalInfo.java | 35 +- .../dubhe/terminal/domain/vo/TerminalInfoVO.java | 22 +- .../org/dubhe/terminal/domain/vo/TerminalVO.java | 19 +- .../dubhe/terminal/rest/TerminalController.java | 17 +- .../terminal/service/TerminalInfoService.java | 18 + .../dubhe/terminal/service/TerminalService.java | 59 +- .../service/impl/TerminalInfoServiceImpl.java | 32 + .../terminal/service/impl/TerminalServiceImpl.java | 682 +++++++---- .../src/main/resources/bootstrap.yml | 4 +- .../src/test/java/service/TerminalServiceTest.java | 82 +- .../java/org/dubhe/train/TrainApplication.java | 2 + .../java/org/dubhe/train/async/TrainJobAsync.java | 134 +-- .../org/dubhe/train/config/TrainJobConfig.java | 6 + .../org/dubhe/train/constant/TrainConstant.java | 16 + .../dubhe/train/constant/TrainErrorConstant.java | 26 + .../java/org/dubhe/train/dao/PtJobParamMapper.java | 11 + .../java/org/dubhe/train/dao/PtTrainJobMapper.java | 17 +- .../dubhe/train/domain/dto/BaseTrainJobDTO.java | 12 + .../dubhe/train/domain/dto/PtTrainJobBaseDTO.java | 6 +- .../train/domain/dto/PtTrainJobCreateDTO.java | 20 + .../train/domain/dto/PtTrainJobDetailQueryDTO.java | 2 +- .../train/domain/dto/PtTrainJobUpdateDTO.java | 19 + .../domain/dto/PtTrainJobVersionQueryDTO.java | 4 + .../train/domain/dto/PtTrainParamCreateDTO.java | 33 +- .../train/domain/dto/PtTrainParamUpdateDTO.java | 32 +- .../train/domain/dto/PtTrainQueryByIdDTO.java | 43 + .../dubhe/train/domain/dto/PtTrainQueryDTO.java | 3 + .../org/dubhe/train/domain/entity/PtJobParam.java | 18 + .../org/dubhe/train/domain/entity/PtTrainJob.java | 22 + .../dubhe/train/domain/entity/PtTrainParam.java | 24 + .../train/domain/vo/PtImageAndAlgorithmVO.java | 3 + .../train/domain/vo/PtTrainJobDetailQueryVO.java | 15 + .../dubhe/train/domain/vo/PtTrainJobDetailVO.java | 20 + .../dubhe/train/domain/vo/PtTrainParamQueryVO.java | 23 +- .../dubhe/train/domain/vo/PtTrainQueryByIdVO.java | 63 + .../java/org/dubhe/train/domain/vo/PtTrainVO.java | 3 + .../dubhe/train/domain/vo/VisualTrainQueryVO.java | 3 + .../dubhe/train/enums/TrainSystemRunParamEnum.java | 126 ++ .../train/enums/TrainSystemRunParamTypeEnum.java | 49 + .../dubhe/train/inner/RunCommandInnerService.java | 36 + .../dubhe/train/inner/TrainFileInnerService.java | 61 + .../train/inner/factory/SystemRunParamFactory.java | 53 + .../inner/handler/SystemMountRunParamHandler.java | 91 ++ .../inner/handler/SystemNormalRunParamHandler.java | 49 + .../inner/handler/SystemOutRunParamHandler.java | 102 ++ .../train/inner/handler/SystemRunParamHandler.java | 64 + .../inner/impl/RunCommandInnerServiceImpl.java | 80 ++ .../inner/impl/TrainFileInnerServiceImpl.java | 96 ++ .../org/dubhe/train/rest/PtTrainJobController.java | 54 +- .../org/dubhe/train/service/PtTrainJobService.java | 37 +- .../service/impl/AlgorithmAsyncServiceImpl.java | 65 +- .../train/service/impl/PtTrainJobServiceImpl.java | 515 ++++++-- .../service/impl/PtTrainParamServiceImpl.java | 158 ++- .../dubhe/train/task/ClearFailedTrainJobTask.java | 66 + .../src/main/resources/bean/dubhe-train-spring.xml | 20 + .../dubhe-train/src/main/resources/bootstrap.yml | 6 +- .../src/main/resources/mapper/PtTrainJobMapper.xml | 8 + .../java/org/dubhe/train/TrainParamApiTest.java | 11 +- .../gateway/src/main/resources/bootstrap.yml | 6 +- dubhe-server/kubeconfig_dev | 19 + dubhe-server/pom.xml | 31 +- dubhe-server/sql/01-Dubhe-DDL.sql | 209 +++- dubhe-server/sql/02-Dubhe-DML.sql | 49 +- dubhe-server/sql/09-Dubhe-Patch.sql | 409 ++++--- dubhe-server/yaml/admin.yaml | 4 +- dubhe-server/yaml/common-biz.yaml | 24 +- dubhe-server/yaml/common-k8s.yaml | 21 +- dubhe-server/yaml/common-recycle.yaml | 2 + dubhe-server/yaml/common-shardingjdbc.yaml | 4 +- dubhe-server/yaml/dubhe-data-dcm.yaml | 5 +- dubhe-server/yaml/dubhe-data-task.yaml | 7 +- dubhe-server/yaml/dubhe-data.yaml | 8 +- dubhe-server/yaml/dubhe-terminal.yaml | 2 +- dubhe-server/yaml/dubhe-train.yaml | 3 + dubhe-server/yaml/gateway.yaml | 258 ++-- dubhe-server/yaml/image.yaml | 6 +- dubhe-server/yaml/measure.yaml | 2 +- 602 files changed, 31090 insertions(+), 2406 deletions(-) create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/async/CleanupUserResourcesAsync.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/GpuConfigClient.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceNamespaceClient.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNamespaceClient.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNodeClient.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/GpuConfigClientFallback.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceNamespaceClientFallback.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNamespaceClientFallback.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNodeClientFallback.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/GpuConfigTemplateClient.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ObtainAccessToken.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ResourceQuotaTemplateClient.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/config/AdminPoolConfig.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/dao/GpuResourceMapper.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserGpuConfigMapper.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceCreateDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceDeleteDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceQueryDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceUpdateDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/QueryUserResourceSpecsDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGpuResourceQueryDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGroupConfigSaveDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceListDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceQueryDTO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/GpuResource.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserGpuConfig.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/GpuResourceQueryVO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserLimitConfigVO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserResourceResVO.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/enums/ResourceTypeEnum.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/enums/StatTypeEnum.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/rest/GpuResourceController.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserResourceController.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/service/GpuResourceService.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserResourceService.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/GpuResourceServiceImpl.java create mode 100644 dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserResourceServiceImpl.java create mode 100644 dubhe-server/admin/src/main/resources/mapper/UserGpuConfigMapper.xml create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/DeleteDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/GpuConfigDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/NamespaceDeleteDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryUserK8sResourceDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ResourceQuotaDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserGpuConfigDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserConfigSaveDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserGpuConfigDTO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/CommandUtil.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MapUtil.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/GpuAllotVO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryUserResourceSpecsVO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotResourceVO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotVO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserConfigVO.java create mode 100644 dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserGpuConfigVO.java create mode 100644 dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/BaseLogQuery.java create mode 100644 dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/entity/PageResult.java create mode 100644 dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-ai.yaml create mode 100644 dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-open-dev.yml create mode 100644 dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-opendev.yaml create mode 100644 dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ServiceApi.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ServiceApiImpl.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/PromethuesConfig.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sCallbackEventMapper.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sGpuConfigMapper.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sNodeMapper.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sTaskIdentifyMapper.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BaseResourceBo.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PromethusNodeMetricsBo.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/K8sGpuConfigDTO.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/NodeInfoDTO.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sCallbackEvent.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sGpuConfig.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sNode.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sTaskIdentify.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerLastStateTerminated.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sEventVO.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sResourceEventResultVO.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/K8sEventTypeEnum.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/listener/DefaultPodExecListener.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sCallbackEventService.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sGpuConfigService.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sNodeService.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sCallbackEventServiceImpl.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sGpuConfigServiceImpl.java create mode 100644 dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCommonUtils.java create mode 100644 dubhe-server/common-k8s/src/main/resources/key/id_rsa create mode 100644 dubhe-server/common-k8s/src/main/resources/key/id_rsa.pub create mode 100644 dubhe-server/common-k8s/src/main/resources/kubeconfig_ai create mode 100644 dubhe-server/common-k8s/src/main/resources/kubeconfig_dev create mode 100644 dubhe-server/common-k8s/src/main/resources/kubeconfig_pre create mode 100644 dubhe-server/common-k8s/src/main/resources/kubeconfig_test create mode 100644 dubhe-server/common-k8s/src/main/resources/mapper/K8sGpuConfigMapper.xml create mode 100644 dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/bo/FileUploadBO.java create mode 100644 dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/GpuResourceVO.java create mode 100644 dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/K8sAllResourceVO.java create mode 100644 dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/observer/TadlTrialObserver.java create mode 100644 dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/GpuConfigController.java create mode 100644 dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/GpuConfigService.java create mode 100644 dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/GpuConfigServiceImpl.java create mode 100644 dubhe-server/dubhe-k8s/src/test/java/org/dubhe/dubhek8s/k8s/api/SystemNamespaceTest.java create mode 100644 dubhe-server/dubhe-k8s/src/test/java/org/dubhe/dubhek8s/k8s/api/SystemNodeTest.java create mode 100644 dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/NoteBookConvertImpl.java create mode 100644 dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/vo/NoteBookVO.java create mode 100644 dubhe-server/dubhe-serving/src/main/resources/server_dev.crt create mode 100644 dubhe-server/dubhe-serving/src/main/resources/server_pre.crt create mode 100644 dubhe-server/dubhe-tadl/pom.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/TadlApplication.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/client/AdminServiceClient.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/client/fallback/AdminServiceFallback.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/config/CmdConf.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/config/RedisStreamListenerContainerConfig.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/config/TadlJobConfig.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/config/TrialPoolConfig.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/constant/RedisKeyConstant.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/constant/TadlConstant.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/AlgorithmMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/AlgorithmStageMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/AlgorithmVersionMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/ExperimentMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/ExperimentStageMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/TrialDataMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/dao/TrialMapper.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/bo/IntermediateAccuracy.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/bo/Label.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/bo/TadlTrialOutput.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/AlgorithmCreateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/AlgorithmUpdateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/AlgorithmVersionCreateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/AlgorithmVersionDeleteDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/AlgorithmVersionSwitchDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/AlgorithmYamlQueryDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentAndTrailDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentBestAccuracyDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentCreateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentIntermediateAccuracyDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentLogQueryDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentMessageDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentPauseDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentQueryDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentRestartDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentRunTimeDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentStageDetailUpdateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentStopDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/ExperimentUpdateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/MaxExecDurationUpdateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/MaxTrialNumUpdateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TadlTrialIntermediateAccuracyDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialConcurrentNumUpdateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialDeleteDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialK8sPodCallBackCreateDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialLogQueryDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialRunParamDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/TrialStopParamDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/dto/UpdateStageYamlDTO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/Algorithm.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/AlgorithmStage.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/AlgorithmVersion.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/Experiment.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/ExperimentStage.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/Trial.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/entity/TrialData.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/AlgorithmNextVersionVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/AlgorithmStageVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/AlgorithmVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/AlgorithmVersionVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/BestAccuracyDataVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/BestAccuracyOutVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/ExperimentFileVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/ExperimentLogQueryVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/ExperimentQueryVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/ExperimentStageParamVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/ExperimentStageVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/ExperimentVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/FieldConfigOutVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/IntermediateAccuracyDataVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/IntermediateAccuracyVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/RumTimeDataVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/RunTimeOutVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/RuntimeParamVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/StageOutlineVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TadlTrialAccuracyVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TrialIntermediateAccuracyVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TrialListVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TrialLogQueryVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TrialResultVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TrialResultValueVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/domain/vo/TrialVO.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/ExperimentStageStateEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/ExperimentStatusEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/ModelTypeEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/StageEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/TadlErrorEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/TimeUnitEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/enums/TrialStatusEnum.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/listener/RedisStreamListener.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/constant/ExperimentEventMachineConstant.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/constant/ExperimentStageEventMachineConstant.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/constant/TrialEventMachineConstant.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/proxy/StateMachineProxy.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/AbstractExperimentStageState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/AbstractExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/AbstractTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experiment/FailedExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experiment/FinishedExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experiment/PausedExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experiment/RunningExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experiment/ToRunExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experiment/WaitingExperimentState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experimentstage/FailedExperimentStageState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experimentstage/FinishedExperimentStageState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experimentstage/RunningExperimentStageState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/experimentstage/ToRunExperimentStageState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/trial/FailedTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/trial/FinishedTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/trial/RunningTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/trial/ToRunTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/trial/UnknownTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/state/specific/trial/WaitingTrialState.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/statemachine/ExperimentStageStateMachine.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/statemachine/ExperimentStateMachine.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/statemachine/GlobalStateMachine.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/statemachine/TrialStateMachine.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/utils/identify/StateMachineStatusUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/utils/identify/StateMachineUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/utils/identify/data/DataHub.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/utils/identify/setting/ExperimentStageStateSelect.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/utils/identify/setting/ExperimentStateSelect.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/machine/utils/identify/setting/StateIdentifySetting.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/rest/AlgorithmController.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/rest/ExperimentController.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/rest/ExperimentStageController.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/rest/K8sCallbackPodController.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/rest/TadlTrialController.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/schedule/TadlSchedule.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/AlgorithmService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/AlgorithmStageService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/AlgorithmVersionService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/ExperimentService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/ExperimentStageService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/TadlRedisService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/TadlTrialService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/TrialDataService.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/AlgorithmServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/AlgorithmStageServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/AlgorithmVersionServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/ExperimentServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/ExperimentStageServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/TadlRedisServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/TadlTrialServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/TrialAsyncServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/service/impl/TrialDataServiceImpl.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/task/TadlAlgorithmRecycleFile.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/task/TadlExperimentRecycleFile.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/task/TrialJobAsyncTask.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/utils/CmdUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/utils/KeyNameConvertUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/utils/PathUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/utils/TimeCalculateUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/java/org/dubhe/tadl/utils/YamlParseUtil.java create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/banner.txt create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/bootstrap.yml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/AlgorithmMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/AlgorithmStageMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/AlgorithmVersionMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/ExperimentMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/ExperimentStageMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/TrialDataMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/main/resources/mapper/TrialMapper.xml create mode 100644 dubhe-server/dubhe-tadl/src/test/java/org/dubhe/tadl/TrialJobAsyncTaskTest.java create mode 100644 dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/async/PreserveTerminalAsync.java create mode 100644 dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/config/TerminalPoolConfig.java create mode 100644 dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalDetailDTO.java create mode 100644 dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/TerminalInfoService.java create mode 100644 dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalInfoServiceImpl.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/constant/TrainErrorConstant.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainQueryByIdDTO.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainQueryByIdVO.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/enums/TrainSystemRunParamEnum.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/enums/TrainSystemRunParamTypeEnum.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/RunCommandInnerService.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/TrainFileInnerService.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/factory/SystemRunParamFactory.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/handler/SystemMountRunParamHandler.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/handler/SystemNormalRunParamHandler.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/handler/SystemOutRunParamHandler.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/handler/SystemRunParamHandler.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/impl/RunCommandInnerServiceImpl.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/inner/impl/TrainFileInnerServiceImpl.java create mode 100644 dubhe-server/dubhe-train/src/main/java/org/dubhe/train/task/ClearFailedTrainJobTask.java create mode 100644 dubhe-server/dubhe-train/src/main/resources/bean/dubhe-train-spring.xml create mode 100644 dubhe-server/kubeconfig_dev diff --git a/dubhe-server/README.md b/dubhe-server/README.md index 1f6a8ed..8d7e72f 100644 --- a/dubhe-server/README.md +++ b/dubhe-server/README.md @@ -11,8 +11,8 @@ Nacos + Fegin + Gateway + (Spring Security + JWT + OAuth2) 初始化sql位置 /sql -**地址:** 127.0.0.1:3306 -**用户名:** test **密码:** test +**地址:** 10.5.29.66:3306 +**用户名:** test **密码:** zj12345678 ### Nacos @@ -24,7 +24,7 @@ Nacos + Fegin + Gateway + (Spring Security + JWT + OAuth2) **详见:** https://nacos.io/zh-cn/docs/quick-start-spring-cloud.html -**地址:** http://127.0.0.1:8848/nacos/#/login +**地址:** http://10.105.1.133:8848/nacos/#/login **用户名:** nacos **密码:** nacos diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/AdminApplication.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/AdminApplication.java index d4abdf7..bd24699 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/AdminApplication.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/AdminApplication.java @@ -19,6 +19,8 @@ package org.dubhe.admin; import org.mybatis.spring.annotation.MapperScan; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.scheduling.annotation.EnableAsync; +import org.springframework.scheduling.annotation.EnableScheduling; /** @@ -27,6 +29,8 @@ import org.springframework.boot.autoconfigure.SpringBootApplication; */ @SpringBootApplication(scanBasePackages = "org.dubhe") @MapperScan(basePackages = {"org.dubhe.**.dao"}) +@EnableScheduling +@EnableAsync public class AdminApplication { public static void main(String[] args) { SpringApplication.run(AdminApplication.class, args); diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/async/CleanupUserResourcesAsync.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/async/CleanupUserResourcesAsync.java new file mode 100644 index 0000000..9ab34ba --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/async/CleanupUserResourcesAsync.java @@ -0,0 +1,68 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.async; + +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import org.dubhe.admin.client.SystemNamespaceClient; +import org.dubhe.admin.dao.UserConfigMapper; +import org.dubhe.admin.dao.UserGpuConfigMapper; +import org.dubhe.admin.domain.entity.UserConfig; +import org.dubhe.admin.domain.entity.UserGpuConfig; +import org.dubhe.biz.base.dto.NamespaceDeleteDTO; +import org.dubhe.biz.base.exception.BusinessException; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Component; + +import java.util.Set; + +/** + * @description 异步清理用户资源 + * @date 2021-11-25 + */ +@Component +public class CleanupUserResourcesAsync { + + @Autowired + private UserConfigMapper userConfigMapper; + + @Autowired + private UserGpuConfigMapper userGpuConfigMapper; + + @Autowired + private SystemNamespaceClient systemNamespaceClient; + + @Async("adminExecutor") + public void cleanUserResource(Set ids, String accessToken){ + //删除用户资源配置 + QueryWrapper userConfigWrapper = new QueryWrapper<>(); + userConfigWrapper.in("user_id",ids); + userConfigMapper.delete(userConfigWrapper); + QueryWrapper userGpuConfigWrapper = new QueryWrapper<>(); + userGpuConfigWrapper.in("user_id",ids); + userGpuConfigMapper.delete(userGpuConfigWrapper); + //删除用户namespace + NamespaceDeleteDTO namespaceDeleteDTO = new NamespaceDeleteDTO(); + namespaceDeleteDTO.setIds(ids); + DataResponseBody dataResponseBody = systemNamespaceClient.deleteNamespace(namespaceDeleteDTO, accessToken); + if (!dataResponseBody.succeed()) { + throw new BusinessException("远程调用k8s删除namespace失败"); + } + } + +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/GpuConfigClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/GpuConfigClient.java new file mode 100644 index 0000000..fae84d6 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/GpuConfigClient.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client; + +import org.dubhe.admin.client.fallback.GpuConfigClientFallback; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.dto.GpuConfigDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; + +/** + * @description 远程调用k8sGPU资源配额 Client + * @date 2021-9-7 + */ +@FeignClient(value = ApplicationNameConst.SERVER_K8S, contextId = "gpuConfigClient",fallback = GpuConfigClientFallback.class) +public interface GpuConfigClient { + /** + * 更新k8sGPU资源配额 + * + * @param gpuConfigDTO k8sGPU资源配额 + * @return + */ + @PostMapping(value = "/gpuConfig/update") + DataResponseBody updateGpuConfig(@RequestBody GpuConfigDTO gpuConfigDTO); + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceNamespaceClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceNamespaceClient.java new file mode 100644 index 0000000..6919434 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceNamespaceClient.java @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client; + +import org.dubhe.admin.client.fallback.ResourceNamespaceClientFallback; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserAllotVO; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestParam; + +import java.util.List; +import java.util.Map; + +/** + * @description 远程调用 + * @date 2021-11-19 + */ +@FeignClient(value = ApplicationNameConst.SERVER_K8S, contextId = "ResourceNamespaceClient", fallback = ResourceNamespaceClientFallback.class) +public interface ResourceNamespaceClient { + + /** + * 查看用户资源用量峰值 + * + * @param resourceType 资源类型 + * @param sumDay 统计时间段 + * @return List 用户资源用量峰值VO 实体类 + */ + @GetMapping("/namespace/ResourceUsage") + DataResponseBody> getResourceNamespace(@RequestParam(value = "resourceType") Integer resourceType, + @RequestParam(value = "sumDay") String sumDay); + + /** + * 查询用户某段时间内的资源用量峰值 + * + * @param resourceType 资源类型 + * @param sumDay 统计时间段 + * @param namespaces 用户命名空间 + * @return String 资源用量峰值 + */ + @GetMapping("/namespace/ResourceByUser") + DataResponseBody> getResourceUsageByUser(@RequestParam(value = "resourceType") Integer resourceType, + @RequestParam(value = "sumDay") String sumDay, + @RequestParam(value = "namespaces") String namespaces); +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java index 0aca0d7..744c5cf 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java @@ -17,8 +17,8 @@ package org.dubhe.admin.client; import org.dubhe.admin.client.fallback.ResourceQuotaClientFallback; -import org.dubhe.admin.domain.dto.UserConfigDTO; import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.dto.ResourceQuotaDTO; import org.dubhe.biz.base.vo.DataResponseBody; import org.springframework.cloud.openfeign.FeignClient; import org.springframework.web.bind.annotation.PostMapping; @@ -28,14 +28,16 @@ import org.springframework.web.bind.annotation.RequestBody; * @description 远程调用资源配额 Client * @date 2021-7-21 */ -@FeignClient(value = ApplicationNameConst.SERVER_K8S,fallback = ResourceQuotaClientFallback.class) +@FeignClient(value = ApplicationNameConst.SERVER_K8S, contextId = "resourceQuotaClient", fallback = ResourceQuotaClientFallback.class) public interface ResourceQuotaClient { + /** * 更新 ResourceQuota * - * @param userConfigDTO 用户配置信息 + * @param resourceQuotaDTO 用户配置信息 * @return */ @PostMapping(value = "/resourceQuota/update") - DataResponseBody updateResourceQuota(@RequestBody UserConfigDTO userConfigDTO); + DataResponseBody updateResourceQuota(@RequestBody ResourceQuotaDTO resourceQuotaDTO); + } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNamespaceClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNamespaceClient.java new file mode 100644 index 0000000..0ca9073 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNamespaceClient.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client; + +import org.dubhe.admin.client.fallback.SystemNamespaceClientFallback; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.dto.NamespaceDeleteDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; + +/** + * @description 命名空间状态管理feign远程调用 + * @date 2021-11-26 + */ +@FeignClient(value = ApplicationNameConst.SERVER_K8S, contextId = "systemNamespaceClient", fallback = SystemNamespaceClientFallback.class) +public interface SystemNamespaceClient { + + /** + * 删除用户namespace + * @param namespaceDeleteDTO 用户id + * @return DataResponseBody + */ + @DeleteMapping(value="/namespace") + DataResponseBody deleteNamespace(@RequestBody NamespaceDeleteDTO namespaceDeleteDTO, @RequestHeader("Authorization") String accessToken); +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNodeClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNodeClient.java new file mode 100644 index 0000000..c0fed9c --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/SystemNodeClient.java @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client; + +import org.dubhe.admin.client.fallback.SystemNodeClientFallback; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.dto.QueryUserK8sResourceDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.QueryUserResourceSpecsVO; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; + +import java.util.List; + +/** + * @description 远程查询用户k8s资源是否可用 + * @date 2021-9-7 + */ +@FeignClient(value = ApplicationNameConst.SERVER_K8S, contextId = "systemNodeClient", fallback = SystemNodeClientFallback.class) +public interface SystemNodeClient { + /** + * 查询用户k8s可用资源 + * + * @param queryUserK8sResources 用户k8s可用资源查询条件 + * @return List 用户k8s可用资源列表 + */ + @PostMapping("/node/queryUserResource") + DataResponseBody> queryUserK8sResource(@RequestBody List queryUserK8sResources); + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/GpuConfigClientFallback.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/GpuConfigClientFallback.java new file mode 100644 index 0000000..6b8e733 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/GpuConfigClientFallback.java @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.fallback; + +import org.dubhe.admin.client.GpuConfigClient; +import org.dubhe.biz.base.dto.GpuConfigDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; + +/** + * @description GpuConfigClient 熔断处理 + * @date 2021-9-7 + */ +public class GpuConfigClientFallback implements GpuConfigClient { + @Override + public DataResponseBody updateGpuConfig(GpuConfigDTO gpuConfigDTO) { + return DataResponseFactory.failed("Call GpuConfig server updateGpuConfig error"); + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceNamespaceClientFallback.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceNamespaceClientFallback.java new file mode 100644 index 0000000..85aceaa --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceNamespaceClientFallback.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.fallback; + +import org.dubhe.admin.client.ResourceNamespaceClient; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserAllotVO; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; + +import java.util.List; +import java.util.Map; + +/** + * @description + * @date 2021-11-19 + */ +public class ResourceNamespaceClientFallback implements ResourceNamespaceClient { + + @Override + public DataResponseBody> getResourceNamespace(Integer resourceType, String sumDay) { + return DataResponseFactory.failed("Call MetricsApi.getNamespaceUsageRate error"); + } + + @Override + public DataResponseBody> getResourceUsageByUser(Integer resourceType, String sumDay, String namespaces) { + return DataResponseFactory.failed("Call MetricsApi.getResourceUsageByUser error"); + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java index 3adf93f..df24b8f 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java @@ -17,7 +17,7 @@ package org.dubhe.admin.client.fallback; import org.dubhe.admin.client.ResourceQuotaClient; -import org.dubhe.admin.domain.dto.UserConfigDTO; +import org.dubhe.biz.base.dto.ResourceQuotaDTO; import org.dubhe.biz.base.vo.DataResponseBody; import org.dubhe.biz.dataresponse.factory.DataResponseFactory; @@ -27,7 +27,7 @@ import org.dubhe.biz.dataresponse.factory.DataResponseFactory; */ public class ResourceQuotaClientFallback implements ResourceQuotaClient { @Override - public DataResponseBody updateResourceQuota(UserConfigDTO userConfigDTO) { + public DataResponseBody updateResourceQuota(ResourceQuotaDTO resourceQuotaDTO) { return DataResponseFactory.failed("Call ResourceQuota server updateResourceQuota error"); } } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNamespaceClientFallback.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNamespaceClientFallback.java new file mode 100644 index 0000000..99b6601 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNamespaceClientFallback.java @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.fallback; + +import org.dubhe.admin.client.SystemNamespaceClient; +import org.dubhe.biz.base.dto.NamespaceDeleteDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; + +/** + * @description 命名空间状态管理feign远程调用熔断处理 + * @date 2021-11-26 + */ +public class SystemNamespaceClientFallback implements SystemNamespaceClient { + + @Override + public DataResponseBody deleteNamespace(NamespaceDeleteDTO namespaceDeleteDTO, String accessToken) { + return DataResponseFactory.failed("Call SystemNamespace server deleteNamespace error"); + } + +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNodeClientFallback.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNodeClientFallback.java new file mode 100644 index 0000000..60f49c8 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/SystemNodeClientFallback.java @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.fallback; + +import org.dubhe.admin.client.SystemNodeClient; +import org.dubhe.biz.base.dto.QueryUserK8sResourceDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.QueryUserResourceSpecsVO; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; + +import java.util.List; + +/** + * @description SystemNodeClient 熔断处理 + * @date 2021-9-7 + */ +public class SystemNodeClientFallback implements SystemNodeClient { + + @Override + public DataResponseBody> queryUserK8sResource(List queryUserK8sResources) { + return DataResponseFactory.failed("Call SystemNode server queryUserK8sResource error"); + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/GpuConfigTemplateClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/GpuConfigTemplateClient.java new file mode 100644 index 0000000..a4e2799 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/GpuConfigTemplateClient.java @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.template; + +import cn.hutool.http.HttpStatus; +import com.alibaba.fastjson.JSON; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.dto.GpuConfigDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestTemplate; + +/** + * @description 远程调用k8sGPU资源配额 Client + * @date 2021-11-16 + */ +@Component +public class GpuConfigTemplateClient { + + @Autowired + private RestTemplate restTemplate; + + /** + * 更新k8sGPU资源配额 + * + * @param gpuConfigDTO k8sGPU资源配额 + * @return DataResponseBody + */ + public DataResponseBody updateGpuConfig(GpuConfigDTO gpuConfigDTO, String token) { + HttpHeaders headers = new HttpHeaders(); + headers.add(HttpHeaders.AUTHORIZATION, token); + headers.setContentType(MediaType.valueOf(MediaType.APPLICATION_JSON_VALUE)); + HttpEntity httpEntity = new HttpEntity<>(JSON.toJSONString(gpuConfigDTO), headers); + ResponseEntity responseEntity = restTemplate.postForEntity("http://" + ApplicationNameConst.SERVER_K8S + "/gpuConfig/update", httpEntity, DataResponseBody.class); + if (HttpStatus.HTTP_OK != responseEntity.getStatusCodeValue()) { + return null; + } + DataResponseBody restResult = responseEntity.getBody(); + return restResult; + } +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ObtainAccessToken.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ObtainAccessToken.java new file mode 100644 index 0000000..b58301d --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ObtainAccessToken.java @@ -0,0 +1,91 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.template; + +import cn.hutool.crypto.asymmetric.KeyType; +import cn.hutool.crypto.asymmetric.RSA; +import cn.hutool.http.HttpStatus; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.constant.AuthConst; +import org.dubhe.biz.base.exception.BusinessException; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Component; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.client.RestTemplate; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * @description 模拟登录获取token + * @date 2021-11-11 + */ +@Component +public class ObtainAccessToken { + + @Autowired + private RestTemplate restTemplate; + + @Value("${rsa.private_key}") + private String privateKey; + + /** + * 模拟登录获取token + * @return String token + */ + public String generateToken(String username,String userPassword) { + String password = null; + try { + RSA rsa = new RSA(privateKey, null); + password = new String(rsa.decrypt(userPassword, KeyType.PrivateKey)); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_SYS, "rsa 密钥解析失败, originPassword:{} , 密钥:{},异常:{}", userPassword, KeyType.PrivateKey, e); + throw new BusinessException("密钥解析失败"); + } + MultiValueMap params = new LinkedMultiValueMap<>(); + params.add("grant_type", AuthConst.GRANT_TYPE); + params.add("client_id", AuthConst.CLIENT_ID); + params.add("client_secret", AuthConst.CLIENT_SECRET); + params.add("username", username); + params.add("password", password); + params.add("scope", "all"); + HttpHeaders headers = new HttpHeaders(); + // 需求需要传参为application/x-www-form-urlencoded格式 + headers.setContentType(MediaType.valueOf(MediaType.APPLICATION_FORM_URLENCODED_VALUE)); + HttpEntity> httpEntity = new HttpEntity<>(params, headers); + ResponseEntity responseEntity = restTemplate.postForEntity("http://" + ApplicationNameConst.SERVER_AUTHORIZATION + "/oauth/token", httpEntity, DataResponseBody.class); + if (HttpStatus.HTTP_OK != responseEntity.getStatusCodeValue()) { + return null; + } + DataResponseBody restResult = responseEntity.getBody(); + Map map = new LinkedHashMap(); + if (restResult.succeed()) { + map = (Map) restResult.getData(); + } + // 返回 token + return (String) map.get("token"); + } +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ResourceQuotaTemplateClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ResourceQuotaTemplateClient.java new file mode 100644 index 0000000..09275e5 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/template/ResourceQuotaTemplateClient.java @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.template; + +import cn.hutool.http.HttpStatus; +import com.alibaba.fastjson.JSON; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.dto.ResourceQuotaDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestTemplate; + +/** + * @description 远程调用资源配额 + * @date 2021-11-16 + */ +@Component +public class ResourceQuotaTemplateClient { + + @Autowired + private RestTemplate restTemplate; + + /** + * 更新 ResourceQuota + * + * @param resourceQuotaDTO 用户配置信息 + * @return DataResponseBody + */ + public DataResponseBody updateResourceQuota(ResourceQuotaDTO resourceQuotaDTO, String token) { + HttpHeaders headers = new HttpHeaders(); + headers.add(HttpHeaders.AUTHORIZATION, token); + headers.setContentType(MediaType.valueOf(MediaType.APPLICATION_JSON_VALUE)); + HttpEntity httpEntity = new HttpEntity<>(JSON.toJSONString(resourceQuotaDTO), headers); + ResponseEntity responseEntity = restTemplate.postForEntity("http://" + ApplicationNameConst.SERVER_K8S + "/resourceQuota/update", httpEntity, DataResponseBody.class); + if (HttpStatus.HTTP_OK != responseEntity.getStatusCodeValue()) { + return null; + } + DataResponseBody restResult = responseEntity.getBody(); + return restResult; + } +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/config/AdminPoolConfig.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/config/AdminPoolConfig.java new file mode 100644 index 0000000..d09a352 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/config/AdminPoolConfig.java @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.config; + +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.springframework.aop.interceptor.AsyncUncaughtExceptionHandler; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.scheduling.annotation.AsyncConfigurer; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +import java.util.concurrent.Executor; +import java.util.concurrent.ThreadPoolExecutor; + +/** + * @description 线程池配置类 + * @date 2020-07-17 + */ +@Configuration +public class AdminPoolConfig implements AsyncConfigurer { + + @Value("${basepool.corePoolSize:40}") + private Integer corePoolSize; + @Value("${basepool.maximumPoolSize:60}") + private Integer maximumPoolSize; + @Value("${basepool.keepAliveTime:120}") + private Integer keepAliveTime; + @Value("${basepool.blockQueueSize:20}") + private Integer blockQueueSize; + + /** + * 异步处理线程池 + * @return Executor 线程实例 + */ + @Bean("adminExecutor") + @Override + public Executor getAsyncExecutor() { + ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor(); + //核心线程数 + taskExecutor.setCorePoolSize(corePoolSize); + taskExecutor.setAllowCoreThreadTimeOut(true); + //最大线程数 + taskExecutor.setMaxPoolSize(maximumPoolSize); + //超时时间 + taskExecutor.setKeepAliveSeconds(keepAliveTime); + //配置队列大小 + taskExecutor.setQueueCapacity(blockQueueSize); + //配置线程池前缀 + taskExecutor.setThreadNamePrefix("async-admin-"); + //拒绝策略 + taskExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.AbortPolicy()); + taskExecutor.initialize(); + return taskExecutor; + } + + @Override + public AsyncUncaughtExceptionHandler getAsyncUncaughtExceptionHandler() { + LogUtil.error(LogEnum.SYS_ERR, "start capturing the exception information of asynchronous task management admin-----》》》"); + return (ex, method, params) -> { + LogUtil.error(LogEnum.SYS_ERR, "async admin task failed,the name of admin is {}, params are {}, exception is {}", method.getName(), params, ex); + }; + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/GpuResourceMapper.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/GpuResourceMapper.java new file mode 100644 index 0000000..28c91de --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/GpuResourceMapper.java @@ -0,0 +1,27 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.dubhe.admin.domain.entity.GpuResource; + +/** + * @description GPU资源管理mapper接口 + * @date 2021-08-20 + */ +public interface GpuResourceMapper extends BaseMapper { +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java index 270d4c2..2489998 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java @@ -17,10 +17,15 @@ package org.dubhe.admin.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import org.apache.ibatis.annotations.Param; import org.apache.ibatis.annotations.Select; import org.dubhe.admin.domain.entity.UserConfig; +import org.dubhe.admin.domain.vo.UserLimitConfigVO; +import org.dubhe.biz.base.vo.UserAllotResourceVO; +import org.dubhe.biz.base.vo.UserAllotVO; + import java.util.List; -import org.apache.ibatis.annotations.Param; /** * @description 用户配置 Mapper @@ -33,4 +38,43 @@ public interface UserConfigMapper extends BaseMapper { * @param userConfig 用户配置 */ Long insertOrUpdate(UserConfig userConfig); + + /** + * 统计内存、CPU配额 + */ + @Select("select sum(memory_limit) memoryAllotTotal,sum(cpu_limit) cpuAllotTotal from user_config where deleted=0;") + UserAllotResourceVO selectResourceSum(); + + /** + * 统计CPU配额Top10 + */ + @Select("select u.username,uc.cpu_limit allotTotal from user_config uc, user u where uc.user_id=u.id and uc.deleted=0 order by cpu_limit desc limit 10;") + List selectCpuAllotTotal(); + + /** + * 统计内存配额Top10 + */ + @Select("select u.username,uc.memory_limit allotTotal from user_config uc, user u where uc.user_id=u.id and uc.deleted=0 order by memory_limit desc limit 10;") + List selectMemoryAllotTotal(); + + /** + * 根据用户id查询资源配额(cpu、memory) + * + * @param userId 用户ID + * @return 资源配额实体 + */ + @Select("select * from user_config where user_id=#{userId} AND deleted=0") + UserConfig selectLimitSumByUser(@Param("userId") Long userId); + + /** + * 分页查询资源列表 + * + * @param page 分页对象 + * @param sort 排序字段 + * @param order 排序方式 + * @return 用户配额列表 + */ + List selectLimitSum(Page page, + @Param("sort") String sort, + @Param("order") String order); } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserGpuConfigMapper.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserGpuConfigMapper.java new file mode 100644 index 0000000..3311763 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserGpuConfigMapper.java @@ -0,0 +1,81 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Select; +import org.dubhe.admin.domain.entity.UserGpuConfig; +import org.dubhe.biz.base.vo.GpuAllotVO; + +import java.util.List; + +/** + * @description 用户GPU配置 Mapper + * @date 2021-9-2 + */ +public interface UserGpuConfigMapper extends BaseMapper { + + /** + * 批量添加用户GPU配置 + * + * @param userGpuConfigs 用户GPU配置实体集合 + */ + void insertBatchs(List userGpuConfigs); + + /** + * 根据userId查询用户GPU配置记录数 + * @param userId 用户id + * @return Integer 用户GPU配置记录数 + */ + @Select("select count(*) from user_gpu_config where user_id= #{userId}") + Integer selectCountByUserId(@Param("userId") Long userId); + + /** + * 统计GPU型号配额总量 + * + * @return GPU具体型号资源配额 + */ + @Select("select gpu_model gpuModel,sum(gpu_limit)allotTotal from user_gpu_config where deleted=0 group by gpu_model") + List selectGpuAllotSum(); + + /** + * GPU配额TOP10统计 + */ + @Select("SELECT u.username,gc.user_id,SUM(gc.gpu_limit)gpuLimit FROM user_gpu_config gc, user u WHERE gc.user_id=u.id AND gc.deleted=0" + + " GROUP BY gc.user_id ORDER BY gpuLimit DESC LIMIT 10") + List selectAllotTotal(); + + /** + * 查询某用户具体的GPU型号配额 + * + * @param userId 用户ID + * @return GPU型号配额 + */ + @Select("select gpu_model gpuModel,sum(gpu_limit)allotTotal from user_gpu_config where user_id=#{userId} and deleted=0 group by gpuModel") + List selectGpuModelTotal(@Param("userId") Long userId); + + /** + * 根据用户id查询GPU配额总量 + * + * @param userId 用户ID + * @return GPU配额总量 + */ + @Select("SELECT IFNULL(SUM(gpu_limit),0)gpuSum FROM user_gpu_config WHERE user_id=#{userId} AND deleted=0") + int selectGpuLimitSum(@Param("userId") Long userId); + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserMapper.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserMapper.java index cff32e5..fe75675 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserMapper.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserMapper.java @@ -110,6 +110,13 @@ public interface UserMapper extends BaseMapper { */ Set queryPermissionByUserId(Long userId); + /** + * 查找用户所在的用户组名 + * + * @param userId 用户id + * @return 用户组名 + */ + String queryUserGroupNameByUserId(Long userId); /** * 查询实体及关联对象 @@ -144,4 +151,13 @@ public interface UserMapper extends BaseMapper { @ResultMap(value = "userMapperResults") IPage selectCollPageByRoleId(Page page, @Param("ew") Wrapper queryWrapper, Long roleId); + /** + * 根据用户id查找用户名 + * + * @param userId 用户id + * @return 用户名 + */ + @Select("SELECT username FROM user WHERE id=#{userId} and enabled=1 AND deleted=0 ") + String findUserNameById(@Param("userId") Long userId); + } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/AuthCodeQueryDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/AuthCodeQueryDTO.java index b514437..d0e1786 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/AuthCodeQueryDTO.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/AuthCodeQueryDTO.java @@ -32,6 +32,6 @@ public class AuthCodeQueryDTO extends PageQueryBase implements Serializable { private static final long serialVersionUID = 1L; @ApiModelProperty(value = "权限组名称") - private String authCode; + private String keyword; } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceCreateDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceCreateDTO.java new file mode 100644 index 0000000..ca53420 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceCreateDTO.java @@ -0,0 +1,57 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.StringConstant; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.Pattern; +import java.io.Serializable; + +/** + * @description GPU资源创建 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class GpuResourceCreateDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "GPU类型(例如:NVIDIA)", required = true) + @NotBlank(message = "GPU类型") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU类型错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_GPU_TYPE, message = "支持字母、数字、汉字、英文横杠、英文.号、空白字符和英文斜杠") + private String gpuType; + + @ApiModelProperty(value = "GPU型号(例如:v100)", required = true) + @NotBlank(message = "GPU型号") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU型号错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_GPU_MODEL, message = "支持小写字母、数字、英文横杠、英文.号和英文斜杠") + private String gpuModel; + + @ApiModelProperty(value = "k8s GPU资源标签key值(例如:nvidia.com/gpu)", required = true) + @NotBlank(message = "k8s GPU资源标签key值") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU型号错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_K8S, message = "支持小写字母、数字、英文横杠、英文.号和英文斜杠") + private String k8sLabelKey; +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceDeleteDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceDeleteDTO.java new file mode 100644 index 0000000..26f85c4 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceDeleteDTO.java @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; + +import javax.validation.constraints.NotNull; +import java.io.Serializable; +import java.util.Set; + +/** + * @description GPU资源删除 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class GpuResourceDeleteDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "id", required = true) + @NotNull(message = "id不能为空") + private Set ids; +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceQueryDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceQueryDTO.java new file mode 100644 index 0000000..d5e8712 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceQueryDTO.java @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.StringConstant; +import org.dubhe.biz.db.base.PageQueryBase; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.Pattern; +import java.io.Serializable; + +/** + * @description GPU资源查询 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class GpuResourceQueryDTO extends PageQueryBase implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "GPU类型(例如:NVIDIA)") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU类型错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_GPU_TYPE, message = "支持字母、数字、汉字、英文横杠、英文.号、空白字符和英文斜杠") + private String gpuType; + +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceUpdateDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceUpdateDTO.java new file mode 100644 index 0000000..111c58d --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/GpuResourceUpdateDTO.java @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.StringConstant; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; +import javax.validation.constraints.Pattern; +import java.io.Serializable; + +/** + * @description GPU资源修改 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class GpuResourceUpdateDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "id", required = true) + @NotNull(message = "id不能为null") + @Min(value = MagicNumConstant.ONE, message = "id必须大于1") + private Long id; + + @ApiModelProperty(value = "GPU类型(例如:NVIDIA)", required = true) + @NotBlank(message = "GPU类型") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU类型错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_GPU_TYPE, message = "支持字母、数字、汉字、英文横杠、英文.号、空白字符和英文斜杠") + private String gpuType; + + @ApiModelProperty(value = "GPU型号(例如:v100)", required = true) + @NotBlank(message = "GPU型号") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU型号错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_GPU_MODEL, message = "支持小写字母、数字、英文横杠、英文.号、空白字符和英文斜杠") + private String gpuModel; + + @ApiModelProperty(value = "k8s GPU资源标签key值(例如:nvidia.com/gpu)", required = true) + @NotBlank(message = "k8s GPU资源标签key值") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU型号错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_K8S, message = "支持小写字母、数字、英文横杠、英文.号和英文斜杠") + private String k8sLabelKey; +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/MenuQueryDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/MenuQueryDTO.java index 109e5bd..0665748 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/MenuQueryDTO.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/MenuQueryDTO.java @@ -36,8 +36,9 @@ public class MenuQueryDTO { @Query(propName = "create_time", type = Query.Type.BETWEEN) private List createTime; - @Query(propName = "deleted", type = Query.Type.EQ) private Boolean deleted = false; + @Query(type = Query.Type.ORDER_BY) + private String sort = "sort"; } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/QueryUserResourceSpecsDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/QueryUserResourceSpecsDTO.java new file mode 100644 index 0000000..24e7783 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/QueryUserResourceSpecsDTO.java @@ -0,0 +1,65 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; + +import javax.validation.constraints.Max; +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +/** + * @description 查询用户资源规格 + * @date 2021-09-07 + */ +@Data +@Accessors(chain = true) +public class QueryUserResourceSpecsDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "用户id") + @Min(value = MagicNumConstant.ONE, message = "用户id,不能小于1") + private Long userId; + + @ApiModelProperty(value = "所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving,4:dubhe-tadl,5:dubhe-optimize))", required = true) + @NotNull(message = "所属业务场景不能为空") + @Min(value = MagicNumConstant.ZERO, message = "所属业务场景错误") + @Max(value = MagicNumConstant.FIVE, message = "所属业务场景错误") + private Integer module; + + @ApiModelProperty("规格类型(0为CPU, 1为GPU)") + @NotNull(message = "规格类型(0为CPU, 1为GPU)不能为空") + private Boolean resourcesPoolType; + + @ApiModelProperty(value = "节点个数") + @Min(value = MagicNumConstant.ONE, message = "节点个数,默认为1个") + private Integer resourcesPoolNode; + + @ApiModelProperty(value = "GPU型号(例如:v100)") + private String gpuModel; + + @ApiModelProperty(value = "k8s GPU资源标签key值(例如:nvidia.com/gpu)") + private String k8sLabelKey; + + @ApiModelProperty(value = "多GPU,true:GPU数大于1核,false:GPU数等于1核") + private Boolean multiGpu; +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsCreateDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsCreateDTO.java index 7c6f44e..1ed8c94 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsCreateDTO.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsCreateDTO.java @@ -42,10 +42,10 @@ public class ResourceSpecsCreateDTO implements Serializable { @Pattern(regexp = StringConstant.REGEXP_SPECS, message = "规格名称支持字母、数字、汉字、英文横杠、下划线和空白字符") private String specsName; - @ApiModelProperty(value = "所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving)", required = true) + @ApiModelProperty(value = "所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving, 4:dubhe-tadl, 5:dubhe-optimize)", required = true) @NotNull(message = "所属业务场景不能为空") @Min(value = MagicNumConstant.ZERO, message = "所属业务场景错误") - @Max(value = MagicNumConstant.THREE, message = "所属业务场景错误") + @Max(value = MagicNumConstant.FIVE, message = "所属业务场景错误") private Integer module; @ApiModelProperty(value = "CPU数量,单位:核", required = true) diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsQueryDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsQueryDTO.java index ff7c247..4c33735 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsQueryDTO.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsQueryDTO.java @@ -37,6 +37,9 @@ public class ResourceSpecsQueryDTO extends PageQueryBase implements Serializable private static final long serialVersionUID = 1L; + @ApiModelProperty(value = "多GPU,true:GPU数大于1核,false:GPU数等于1核") + private Boolean multiGpu; + @ApiModelProperty("规格名称") @Length(max = MagicNumConstant.THIRTY_TWO, message = "规格名称错误") private String specsName; @@ -44,8 +47,8 @@ public class ResourceSpecsQueryDTO extends PageQueryBase implements Serializable @ApiModelProperty("规格类型(0为CPU, 1为GPU)") private Boolean resourcesPoolType; - @ApiModelProperty("所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving)") + @ApiModelProperty("所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving,4:dubhe-tadl,5:dubhe-optimize)") @Min(value = MagicNumConstant.ZERO, message = "所属业务场景错误") - @Max(value = MagicNumConstant.THREE, message = "所属业务场景错误") + @Max(value = MagicNumConstant.FIVE, message = "所属业务场景错误") private Integer module; } \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsUpdateDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsUpdateDTO.java index 3662bb1..b1de05a 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsUpdateDTO.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/ResourceSpecsUpdateDTO.java @@ -49,10 +49,10 @@ public class ResourceSpecsUpdateDTO implements Serializable { @Pattern(regexp = StringConstant.REGEXP_SPECS, message = "规格名称支持字母、数字、汉字、英文横杠、下划线和空白字符") private String specsName; - @ApiModelProperty(value = "所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving)", required = true) + @ApiModelProperty(value = "所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving, 4:dubhe-tadl, 5:dubhe-optimize)", required = true) @NotNull(message = "所属业务场景不能为空") @Min(value = MagicNumConstant.ZERO, message = "所属业务场景错误") - @Max(value = MagicNumConstant.THREE, message = "所属业务场景错误") + @Max(value = MagicNumConstant.FIVE, message = "所属业务场景错误") private Integer module; @ApiModelProperty(value = "CPU数量,单位:核") diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGpuResourceQueryDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGpuResourceQueryDTO.java new file mode 100644 index 0000000..8c1d7c9 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGpuResourceQueryDTO.java @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.StringConstant; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.Pattern; +import java.io.Serializable; + +/** + * @description 用户GPU资源查询 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class UserGpuResourceQueryDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "GPU类型(例如:NVIDIA)") + @NotBlank(message = "GPU类型") + @Length(max = MagicNumConstant.SIXTY_FOUR, message = "GPU类型错误-输入长度不能超过64个字符") + @Pattern(regexp = StringConstant.REGEXP_GPU_TYPE, message = "支持字母、数字、汉字、英文横杠、英文.号、空白字符和英文斜杠") + private String gpuType; + +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGroupConfigSaveDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGroupConfigSaveDTO.java new file mode 100644 index 0000000..4800167 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserGroupConfigSaveDTO.java @@ -0,0 +1,35 @@ +package org.dubhe.admin.domain.dto; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.dto.UserGpuConfigDTO; + +import javax.validation.constraints.NotNull; +import java.util.List; + +/** + * @date 2021-11-24 + * @description 用户组用户统一配置DTO + */ +@Data +@Accessors(chain = true) +public class UserGroupConfigSaveDTO { + + private static final long serialVersionUID = 1L; + + @NotNull(message = "用户组ID 不能为空") + private Long groupId; + + @NotNull(message = "Notebook 延迟删除时间配置不能为空") + private Integer notebookDelayDeleteTime; + + @NotNull(message = "CPU 资源限制配置不能为空") + private Integer cpuLimit; + + @NotNull(message = "内存资源限制配置不能为空") + private Integer memoryLimit; + + private Long defaultImageId; + + private List gpuResources; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceListDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceListDTO.java new file mode 100644 index 0000000..3c4f85f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceListDTO.java @@ -0,0 +1,31 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import org.dubhe.biz.db.base.PageQueryBase; + +import java.io.Serializable; + +/** + * @description 用户资源列表 + * @date 2021-11-25 + */ +public class UserResourceListDTO extends PageQueryBase implements Serializable { + + private static final long serialVersionUID = 1L; + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceQueryDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceQueryDTO.java new file mode 100644 index 0000000..82a719b --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserResourceQueryDTO.java @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; + +import javax.validation.constraints.NotNull; + +/** + * @description 用户资源统计DTO + * @date 2021-11-17 + */ +@Data +public class UserResourceQueryDTO { + + @ApiModelProperty("资源统计类型") + @NotNull + private Integer statType; + + @ApiModelProperty("资源类型") + @NotNull + private Integer resourceType; + + @ApiModelProperty("统计时间段") + private String sumDay; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/GpuResource.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/GpuResource.java new file mode 100644 index 0000000..348a74f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/GpuResource.java @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; + +import javax.validation.constraints.NotNull; + +/** + * @description GPU资源实体类 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +@TableName("gpu_resource") +public class GpuResource extends BaseEntity { + + /** + * 主键ID + */ + @TableId(value = "id", type = IdType.AUTO) + @NotNull(groups = {Update.class}) + private Long id; + + /** + * GPU类型(例如:NVIDIA) + */ + @TableField(value = "gpu_type") + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + @TableField(value = "gpu_model") + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + @TableField(value = "k8s_label_key") + private String k8sLabelKey; + +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java index 5513718..c0cd7bb 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java @@ -39,6 +39,12 @@ public class UserConfig extends BaseEntity { @TableId(value = "user_id") private Long userId; + @TableField(exist = false) + private String userName; + + @TableField(exist = false) + private String nickName; + @TableId(value = "notebook_delay_delete_time") private Integer notebookDelayDeleteTime; @@ -48,6 +54,6 @@ public class UserConfig extends BaseEntity { @TableId(value = "memory_limit") private Integer memoryLimit; - @TableId(value = "gpu_limit") - private Integer gpuLimit; + @TableId(value = "default_image_id") + private Long defaultImageId; } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserGpuConfig.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserGpuConfig.java new file mode 100644 index 0000000..cc95439 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserGpuConfig.java @@ -0,0 +1,80 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; + +import javax.validation.constraints.NotNull; + +/** + * @description 用户GPU配置实体 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +@TableName("user_gpu_config") +public class UserGpuConfig extends BaseEntity { + + /** + * 主键ID + */ + @TableId(value = "id", type = IdType.AUTO) + @NotNull(groups = {Update.class}) + private Long id; + + /** + * 用户id + */ + @TableId(value = "user_id") + private Long userId; + + /** + * 用户名 + */ + @TableField(exist = false) + private String userName; + + /** + * GPU类型(例如:NVIDIA) + */ + @TableField(value = "gpu_type") + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + @TableField(value = "gpu_model") + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + @TableField(value = "k8s_label_key") + private String k8sLabelKey; + + /** + * 用户显卡资源限制配置,单位:卡 + */ + @TableId(value = "gpu_limit") + private Integer gpuLimit; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/GpuResourceQueryVO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/GpuResourceQueryVO.java new file mode 100644 index 0000000..877cf6f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/GpuResourceQueryVO.java @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.vo; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; +import java.sql.Timestamp; + +/** + * @description GPU资源查询结果封装类 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class GpuResourceQueryVO implements Serializable { + + private static final long serialVersionUID = 1L; + + @ApiModelProperty("主键ID") + private Long id; + + @ApiModelProperty("GPU类型(例如:NVIDIA)") + private String gpuType; + + @ApiModelProperty("GPU型号(例如:v100)") + private String gpuModel; + + @ApiModelProperty("k8s GPU资源标签key值(例如:nvidia.com/gpu)") + private String k8sLabelKey; + + @ApiModelProperty("创建人") + private Long createUserId; + + @ApiModelProperty("创建时间") + private Timestamp createTime; + + @ApiModelProperty("更新人") + private Long updateUserId; + + @ApiModelProperty("更新时间") + private Timestamp updateTime; +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/MenuVo.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/MenuVo.java index 736b0d6..fa15916 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/MenuVo.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/MenuVo.java @@ -20,13 +20,14 @@ import lombok.Data; import java.io.Serializable; import java.util.List; +import java.util.Map; /** * @description 菜单VO * @date 2020-06-01 */ @Data -@JsonInclude(JsonInclude.Include.NON_EMPTY) +@JsonInclude(JsonInclude.Include.NON_NULL) public class MenuVo implements Serializable { private static final long serialVersionUID = 7145999097655311261L; @@ -38,7 +39,7 @@ public class MenuVo implements Serializable { private String component; - private MenuMetaVo meta; + private Map meta; private List children; } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserLimitConfigVO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserLimitConfigVO.java new file mode 100644 index 0000000..c21f427 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserLimitConfigVO.java @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.vo; + +import lombok.Data; + +/** + * @description + * @date 2021-11-26 + */ +@Data +public class UserLimitConfigVO { + + private Long userId; + + private String userName; + + private String nickName; + + private String gpu; + + private String cpu; + + private String mem; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserResourceResVO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserResourceResVO.java new file mode 100644 index 0000000..9b77e2a --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserResourceResVO.java @@ -0,0 +1,91 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.vo; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.vo.GpuAllotVO; + +import java.util.List; + +/** + * @description 用户资源分页列表VO + * @date 2021-11-23 + */ +@Data +@Accessors(chain = true) +public class UserResourceResVO { + + @ApiModelProperty("编号") + private Long id; + + @ApiModelProperty("用户名") + private String userName; + + @ApiModelProperty("账户名(昵称)") + private String nickName; + + @ApiModelProperty("GPU配额") + private String gpu; + + @ApiModelProperty("GPU具体型号配额") + private List gpuModelAllots; + + @ApiModelProperty("7天内GPU峰值使用率") + private String gpu7; + + @ApiModelProperty("7天内GPU峰值使用量") + private String gpu7unit; + + @ApiModelProperty("15天内GPU峰值使用率") + private String gpu15; + + @ApiModelProperty("15天内GPU峰值使用量") + private String gpu15unit; + + @ApiModelProperty("内存配额") + private String mem; + + @ApiModelProperty("7天内内存峰值使用率") + private String mem7; + + @ApiModelProperty("7天内内存峰值使用量") + private String mem7unit; + + @ApiModelProperty("15天内内存峰值使用率") + private String mem15; + + @ApiModelProperty("15天内内存峰值使用量") + private String mem15unit; + + @ApiModelProperty("CPU配额") + private String cpu; + + @ApiModelProperty("7天内CPU峰值使用率") + private String cpu7; + + @ApiModelProperty("7天内CPU峰值使用量") + private String cpu7unit; + + @ApiModelProperty("15天内CPU峰值使用率") + private String cpu15; + + @ApiModelProperty("15天内CPU峰值使用量") + private String cpu15unit; + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/enums/ResourceTypeEnum.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/enums/ResourceTypeEnum.java new file mode 100644 index 0000000..cd13495 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/enums/ResourceTypeEnum.java @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.enums; + +/** + * @description + * @date 2021-11-17 + */ +public enum ResourceTypeEnum { + GPU_TYPE(1, "gpu"), + CPU_TYPE(2, "cpu"), + MEMORY_TYPE(3, "memory"); + + + private Integer code; + + private String desc; + + ResourceTypeEnum(Integer code, String desc) { + this.code = code; + this.desc = desc; + } + + public Integer getCode() { + return code; + } + + public String getDesc() { + return desc; + } + + @Override + public String toString() { + return "[" + this.code + "]" + this.desc; + } + + public static boolean isGpuType(Integer code) { + return GPU_TYPE.code.equals(code); + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/enums/StatTypeEnum.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/enums/StatTypeEnum.java new file mode 100644 index 0000000..9232291 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/enums/StatTypeEnum.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.enums; + +/** + * @description 资源统计类型 + * @date 2021-11-17 + */ +public enum StatTypeEnum { + ALLOT_TYPE(1, "统计资源配额"), + USAGE_RATE_TYPE(2, "统计资源使用率峰值"), + USAGE_TYPE(3, "统计资源用量峰值"); + + + private Integer code; + + private String desc; + + StatTypeEnum(Integer code, String desc) { + this.code = code; + this.desc = desc; + } + + public Integer getCode() { + return code; + } + + public String getDesc() { + return desc; + } + + @Override + public String toString() { + return "[" + this.code + "]" + this.desc; + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventListener.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventListener.java index e206e77..88ea8d2 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventListener.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventListener.java @@ -42,7 +42,7 @@ public class EmailEventListener { @EventListener - @Async("taskExecutor") + @Async public void onApplicationEvent(EmailEvent event) { EmailDTO emailDTO = (EmailDTO) event.getSource(); sendMail(emailDTO.getReceiverMailAddress(), emailDTO.getSubject(), emailDTO.getCode()); diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventPublisher.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventPublisher.java index 9e69028..cd789f2 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventPublisher.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/event/EmailEventPublisher.java @@ -41,7 +41,7 @@ public class EmailEventPublisher { * * @param dto */ - @Async("taskExecutor") + @Async public void sentEmailEvent(final EmailDTO dto) { try { EmailEvent emailEvent = new EmailEvent(dto); diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/GpuResourceController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/GpuResourceController.java new file mode 100644 index 0000000..7a41bec --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/GpuResourceController.java @@ -0,0 +1,89 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import org.dubhe.admin.domain.dto.*; +import org.dubhe.admin.service.GpuResourceService; +import org.dubhe.biz.base.constant.Permissions; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.web.bind.annotation.*; + +import javax.validation.Valid; + +/** + * @description GPU资源管理 + * @date 2021-08-20 + */ +@Api(tags = "系统:GPU资源管理") +@RestController +@RequestMapping("/gpuResource") +public class GpuResourceController { + + @Autowired + private GpuResourceService gpuResourceService; + + @ApiOperation("查询GPU资源") + @GetMapping + public DataResponseBody getGpuResource(GpuResourceQueryDTO gpuResourceQueryDTO) { + return new DataResponseBody(gpuResourceService.getGpuResource(gpuResourceQueryDTO)); + } + + @ApiOperation("查询用户GPU类型") + @GetMapping("/getUserGpuType") + public DataResponseBody getUserGpuType() { + return new DataResponseBody(gpuResourceService.getUserGpuType()); + } + + @ApiOperation("根据用户GPU类型查询用户GPU资源") + @GetMapping("/getUserGpuModel") + public DataResponseBody getUserGpuResource(UserGpuResourceQueryDTO userGpuResourceQueryDTO) { + return new DataResponseBody(gpuResourceService.getUserGpuResource(userGpuResourceQueryDTO)); + } + + @ApiOperation("查询GPU类型") + @GetMapping("/getGpuType") + public DataResponseBody getGpuType() { + return new DataResponseBody(gpuResourceService.getGpuType()); + } + + @ApiOperation("新增GPU资源") + @PostMapping + @PreAuthorize(Permissions.GPU_CREATE) + public DataResponseBody create(@Valid @RequestBody GpuResourceCreateDTO gpuResourceCreateDTO) { + return new DataResponseBody(gpuResourceService.create(gpuResourceCreateDTO)); + } + + @ApiOperation("修改GPU资源") + @PutMapping + @PreAuthorize(Permissions.GPU_EDIT) + public DataResponseBody update(@Valid @RequestBody GpuResourceUpdateDTO gpuResourceUpdateDTO) { + return new DataResponseBody(gpuResourceService.update(gpuResourceUpdateDTO)); + } + + @ApiOperation("删除GPU资源") + @DeleteMapping + @PreAuthorize(Permissions.GPU_DELETE) + public DataResponseBody delete(@Valid @RequestBody GpuResourceDeleteDTO gpuResourceDeleteDTO) { + gpuResourceService.delete(gpuResourceDeleteDTO); + return new DataResponseBody(); + } + +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/ResourceSpecsController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/ResourceSpecsController.java index b445977..31ed63a 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/ResourceSpecsController.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/ResourceSpecsController.java @@ -25,10 +25,12 @@ import org.dubhe.admin.domain.dto.ResourceSpecsUpdateDTO; import org.dubhe.admin.service.ResourceSpecsService; import org.dubhe.biz.base.constant.Permissions; import org.dubhe.biz.base.dto.QueryResourceSpecsDTO; +import org.dubhe.admin.domain.dto.QueryUserResourceSpecsDTO; import org.dubhe.biz.base.vo.DataResponseBody; import org.dubhe.biz.base.vo.QueryResourceSpecsVO; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.validation.annotation.Validated; import org.springframework.web.bind.annotation.*; import javax.validation.Valid; @@ -47,16 +49,30 @@ public class ResourceSpecsController { @ApiOperation("查询资源规格") @GetMapping - public DataResponseBody getResourceSpecs(ResourceSpecsQueryDTO resourceSpecsQueryDTO) { + public DataResponseBody getResourceSpecs(@Validated ResourceSpecsQueryDTO resourceSpecsQueryDTO) { return new DataResponseBody(resourceSpecsService.getResourceSpecs(resourceSpecsQueryDTO)); } - @ApiOperation("查询资源规格(远程调用)") + + @ApiOperation("查询资源规格(训练远程调用)") @GetMapping("/queryResourceSpecs") - public DataResponseBody queryResourceSpecs(QueryResourceSpecsDTO queryResourceSpecsDTO) { + public DataResponseBody queryResourceSpecs(@Validated QueryResourceSpecsDTO queryResourceSpecsDTO) { return new DataResponseBody(resourceSpecsService.queryResourceSpecs(queryResourceSpecsDTO)); } + @ApiOperation("查询用户资源规格") + @GetMapping("/queryUserResourceSpecs") + public DataResponseBody getUserResourceSpecs(@Validated QueryUserResourceSpecsDTO queryUserResourceSpecsDTO) { + return new DataResponseBody(resourceSpecsService.getUserResourceSpecs(queryUserResourceSpecsDTO)); + } + + + @ApiOperation("查询资源规格(tadl远程调用)") + @GetMapping("/queryTadlResourceSpecs") + public DataResponseBody queryTadlResourceSpecs(Long id) { + return new DataResponseBody(resourceSpecsService.queryTadlResourceSpecs(id)); + } + @ApiOperation("新增资源规格") @PostMapping @PreAuthorize(Permissions.SPECS_CREATE) diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserCenterController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserCenterController.java index 2188eae..ccf6501 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserCenterController.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserCenterController.java @@ -78,7 +78,6 @@ public class UserCenterController { List roles = roleService.getRoleByUserId(curUserId); List menuDtoList = menuService.findByRoles(roles); List menuDtos = (List) menuService.buildTree(menuDtoList).get("result"); - return new DataResponseBody(menuService.buildMenus(menuDtos)); } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java index 576db7e..fd8ac0f 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java @@ -19,7 +19,6 @@ package org.dubhe.admin.rest; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; -import org.dubhe.admin.domain.dto.UserConfigDTO; import org.dubhe.admin.domain.dto.UserCreateDTO; import org.dubhe.admin.domain.dto.UserDeleteDTO; import org.dubhe.admin.domain.dto.UserQueryDTO; @@ -27,12 +26,23 @@ import org.dubhe.admin.domain.dto.UserUpdateDTO; import org.dubhe.admin.service.UserService; import org.dubhe.biz.base.constant.Permissions; import org.dubhe.biz.base.context.UserContext; +import org.dubhe.biz.base.dto.UserConfigSaveDTO; import org.dubhe.biz.base.dto.UserDTO; import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserConfigVO; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.validation.annotation.Validated; -import org.springframework.web.bind.annotation.*; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; import javax.servlet.http.HttpServletResponse; import javax.validation.Valid; @@ -81,22 +91,23 @@ public class UserController { @ApiOperation("删除用户") @DeleteMapping @PreAuthorize(Permissions.USER_DELETE) - public DataResponseBody delete(@Valid @RequestBody UserDeleteDTO userDeleteDTO) { - userService.delete(userDeleteDTO.getIds()); + public DataResponseBody delete(@Valid @RequestBody UserDeleteDTO userDeleteDTO, @RequestHeader("Authorization") String accessToken) { + userService.delete(userDeleteDTO.getIds(), accessToken); return new DataResponseBody(); } @ApiOperation("根据用户ID查询用户配置") @GetMapping(value = "/getUserConfig") - public DataResponseBody getUserConfig(@RequestParam(value = "userId") Long userId) { + public DataResponseBody getUserConfig(@RequestParam(value = "userId") Long userId) { return new DataResponseBody(userService.findUserConfig(userId)); } @ApiOperation("新增或修改用户配置") @PutMapping(value = "/setUserConfig") @PreAuthorize(Permissions.USER_CONFIG_EDIT) - public DataResponseBody setUserConfig(@Validated @RequestBody UserConfigDTO userConfigDTO) { - return new DataResponseBody(userService.createOrUpdateUserConfig(userConfigDTO)); + public DataResponseBody setUserConfig(@Validated @RequestBody UserConfigSaveDTO userConfigSaveDTO) { + userService.saveUserConfig(userConfigSaveDTO, null); + return new DataResponseBody(); } /** @@ -121,7 +132,7 @@ public class UserController { @ApiOperation("根据用户昵称搜索用户列表") @GetMapping(value = "/findByNickName") - public DataResponseBody> findByNickName(@RequestParam(value = "nickName",required = false) String nickName) { + public DataResponseBody> findByNickName(@RequestParam(value = "nickName", required = false) String nickName) { return new DataResponseBody(userService.findByNickName(nickName)); } @@ -130,4 +141,20 @@ public class UserController { public DataResponseBody> getUserList(@RequestParam(value = "ids") List ids) { return new DataResponseBody(userService.getUserList(ids)); } + + @ApiOperation("重置密码") + @PostMapping(value = "/resetPassword/{userId}") + @PreAuthorize(Permissions.USER_RESET_PASSWORD) + public DataResponseBody resetPassword(@PathVariable Long userId) { + return userService.resetPassword(userId); + } + + + @ApiOperation("获取用户资源配额总量") + @GetMapping("/userAllot") + @PreAuthorize(Permissions.SYSTEM_NODE) + public DataResponseBody getUserAllotTotal() { + return userService.getAllotResources(); + } + } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserGroupController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserGroupController.java index 65d2759..2acc1bc 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserGroupController.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserGroupController.java @@ -18,6 +18,7 @@ package org.dubhe.admin.rest; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; +import org.dubhe.admin.domain.dto.UserGroupConfigSaveDTO; import org.dubhe.admin.domain.dto.UserGroupDTO; import org.dubhe.admin.domain.dto.UserGroupDeleteDTO; import org.dubhe.admin.domain.dto.UserGroupQueryDTO; @@ -32,9 +33,11 @@ import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.validation.annotation.Validated; import org.springframework.web.bind.annotation.DeleteMapping; import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.PutMapping; import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; @@ -118,8 +121,8 @@ public class UserGroupController { @DeleteMapping("/delete") @ApiOperation("批量删除组用户") @PreAuthorize(Permissions.USER_GROUP_DELETE_USER) - public DataResponseBody delUser(@Validated @RequestBody UserGroupUpdDTO userGroupUpdDTO) { - userGroupService.delUser(userGroupUpdDTO); + public DataResponseBody delUser(@Validated @RequestBody UserGroupUpdDTO userGroupUpdDTO, @RequestHeader("Authorization") String accessToken) { + userGroupService.delUser(userGroupUpdDTO, accessToken); return new DataResponseBody(); } @@ -130,4 +133,21 @@ public class UserGroupController { userGroupService.updateUserRole(userRoleUpdateDTO); return new DataResponseBody(); } + + @PutMapping("/resetPassword/{groupId}") + @ApiOperation("批量重置组成员密码") + @PreAuthorize(Permissions.USER_GROUP_RESET_USER_PASSWORD) + public DataResponseBody resetUserPassword(@PathVariable Long groupId) { + userGroupService.resetUserPassword(groupId); + return new DataResponseBody(); + } + + @ApiOperation("批量新增或修改组成员配置") + @PutMapping(value = "/setUserConfig") + @PreAuthorize(Permissions.USER_GROUP_CONFIG_EDIT) + public DataResponseBody setUserConfig(@Validated @RequestBody UserGroupConfigSaveDTO userGroupConfigSaveDTO) { + userGroupService.saveUserConfig(userGroupConfigSaveDTO); + return new DataResponseBody(); + } + } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserResourceController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserResourceController.java new file mode 100644 index 0000000..00f765f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserResourceController.java @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import org.dubhe.admin.domain.dto.UserResourceListDTO; +import org.dubhe.admin.domain.dto.UserResourceQueryDTO; +import org.dubhe.admin.service.UserResourceService; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserAllotVO; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; + +/** + * @description 用户资源控制层 + * @date 2021-11-23 + */ +@Api(tags = "控制台:用户统计") +@RestController +@RequestMapping("/resource") +public class UserResourceController { + + @Autowired + private UserResourceService userResourceService; + + + @ApiOperation("用户Top统计") + @GetMapping("/total") + public DataResponseBody> getUserResourceTotal(@Validated UserResourceQueryDTO resourceQueryDTO) { + return new DataResponseBody(userResourceService.getResourceTotal(resourceQueryDTO)); + } + + @ApiOperation("用户资源统计列表") + @GetMapping("/list") + public DataResponseBody getUserResourceList(UserResourceListDTO UserResourceListDTO) { + return new DataResponseBody(userResourceService.getResourceList(UserResourceListDTO)); + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/GpuResourceService.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/GpuResourceService.java new file mode 100644 index 0000000..ca6bda2 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/GpuResourceService.java @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.service; + +import org.dubhe.admin.domain.dto.*; +import org.dubhe.admin.domain.entity.GpuResource; +import org.dubhe.admin.domain.vo.GpuResourceQueryVO; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * @description GPU资源管理 + * @date 2021-08-20 + */ +public interface GpuResourceService { + + /** + * 查询GPU资源 + * @param gpuResourceQueryDTO 查询GPU资源请求实体 + * @return List gpuResourceSpecs GPU资源列表 + */ + Map getGpuResource(GpuResourceQueryDTO gpuResourceQueryDTO); + + /** + * 新增GPU资源 + * @param gpuResourceCreateDTO 新增GPU资源实体 + * @return List 新增GPU资源id + */ + List create(GpuResourceCreateDTO gpuResourceCreateDTO); + + /** + * 修改GPU资源 + * @param gpuResourceUpdateDTO 修改GPU资源实体 + * @return List 修改GPU资源id + */ + List update(GpuResourceUpdateDTO gpuResourceUpdateDTO); + + /** + * GPU资源删除 + * @param gpuResourceDeleteDTO GPU资源删除id集合 + */ + void delete(GpuResourceDeleteDTO gpuResourceDeleteDTO); + + /** + * 查询GPU类型 + * @return List GPU类型列表 + */ + List getGpuType(); + + /** + * 查询用户GPU类型 + * @return Set GPU类型列表 + */ + Set getUserGpuType(); + + /** + * 根据用户GPU类型查询用户GPU资源 + * @return List 用户GPU资源列表 + */ + List getUserGpuResource(UserGpuResourceQueryDTO userGpuResourceQueryDTO); +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/ResourceSpecsService.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/ResourceSpecsService.java index 998d387..e4038db 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/ResourceSpecsService.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/ResourceSpecsService.java @@ -16,9 +16,14 @@ */ package org.dubhe.admin.service; -import org.dubhe.admin.domain.dto.*; -import org.dubhe.biz.base.vo.QueryResourceSpecsVO; +import org.dubhe.admin.domain.dto.ResourceSpecsCreateDTO; +import org.dubhe.admin.domain.dto.ResourceSpecsDeleteDTO; +import org.dubhe.admin.domain.dto.ResourceSpecsQueryDTO; +import org.dubhe.admin.domain.dto.ResourceSpecsUpdateDTO; +import org.dubhe.biz.base.vo.QueryUserResourceSpecsVO; import org.dubhe.biz.base.dto.QueryResourceSpecsDTO; +import org.dubhe.admin.domain.dto.QueryUserResourceSpecsDTO; +import org.dubhe.biz.base.vo.QueryResourceSpecsVO; import java.util.List; import java.util.Map; @@ -62,4 +67,18 @@ public interface ResourceSpecsService { * @return QueryResourceSpecsVO 资源规格返回结果实体类 */ QueryResourceSpecsVO queryResourceSpecs(QueryResourceSpecsDTO queryResourceSpecsDTO); + + /** + * 查询用户资源规格 + * @param queryUserResourceSpecsDTO 查询用户资源规格请求实体 + * @return List 用户资源规格返回结果实体类集合 + */ + List getUserResourceSpecs(QueryUserResourceSpecsDTO queryUserResourceSpecsDTO); + + /** + * 查询资源规格 + * @param id 资源规格id + * @return QueryResourceSpecsVO 资源规格返回结果实体类 + */ + QueryResourceSpecsVO queryTadlResourceSpecs(Long id); } \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserGroupService.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserGroupService.java index b89f466..266675d 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserGroupService.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserGroupService.java @@ -101,7 +101,7 @@ public interface UserGroupService { * * @param userGroupUpdDTO 批量删除用户组用户DTO */ - void delUser(UserGroupUpdDTO userGroupUpdDTO); + void delUser(UserGroupUpdDTO userGroupUpdDTO, String accessToken); /** * 批量修改用户组用户的角色 @@ -109,4 +109,19 @@ public interface UserGroupService { * @param userRoleUpdateDTO */ void updateUserRole(UserRoleUpdateDTO userRoleUpdateDTO); + + + /** + * 批量重置用户组用户的密码 + * + * @param groupId + */ + void resetUserPassword(Long groupId); + + /** + * 批量修改用户组用户的设置 + * + * @param userGroupConfigSaveDTO + */ + void saveUserConfig(UserGroupConfigSaveDTO userGroupConfigSaveDTO); } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserResourceService.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserResourceService.java new file mode 100644 index 0000000..f0fa38f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserResourceService.java @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.service; + +import org.dubhe.admin.domain.dto.UserResourceListDTO; +import org.dubhe.admin.domain.dto.UserResourceQueryDTO; +import org.dubhe.admin.domain.vo.UserResourceResVO; +import org.dubhe.biz.base.vo.UserAllotVO; + +import java.util.List; +import java.util.Map; + +/** + * @description 用户资源统计接口层 + * @date 2021-11-23 + */ +public interface UserResourceService { + + /** + * 用户资源统计 + * + * @param resourceQueryDTO 查询DTO实体 + * @return List 用户资源Top数据 + */ + List getResourceTotal(UserResourceQueryDTO resourceQueryDTO); + + /** + * 用户资源统计列表 + * + * @return List 用户资源列表VO实体 + */ + Map getResourceList(UserResourceListDTO resourceListDTO); + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java index c7589cc..e19d18c 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java @@ -18,13 +18,21 @@ package org.dubhe.admin.service; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.service.IService; -import org.dubhe.admin.domain.dto.*; +import org.dubhe.admin.domain.dto.AuthUserDTO; +import org.dubhe.admin.domain.dto.UserCenterUpdateDTO; +import org.dubhe.admin.domain.dto.UserCreateDTO; +import org.dubhe.admin.domain.dto.UserEmailUpdateDTO; +import org.dubhe.admin.domain.dto.UserQueryDTO; +import org.dubhe.admin.domain.dto.UserRegisterDTO; +import org.dubhe.admin.domain.dto.UserRegisterMailDTO; +import org.dubhe.admin.domain.dto.UserResetPasswordDTO; +import org.dubhe.admin.domain.dto.UserUpdateDTO; import org.dubhe.admin.domain.entity.User; -import org.dubhe.admin.domain.vo.UserConfigCreateVO; -import org.dubhe.admin.domain.vo.UserConfigVO; import org.dubhe.biz.base.dto.TeamDTO; +import org.dubhe.biz.base.dto.UserConfigSaveDTO; import org.dubhe.biz.base.dto.UserDTO; import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserConfigVO; import org.dubhe.cloud.authconfig.service.AdminUserService; import javax.servlet.http.HttpServletResponse; @@ -69,7 +77,7 @@ public interface UserService extends AdminUserService, IService { * * @param ids 用户ID列表 */ - void delete(Set ids); + void delete(Set ids, String accessToken); /** * 根据用户名称获取用户信息 @@ -235,8 +243,21 @@ public interface UserService extends AdminUserService, IService { /** * 创建或更新用户配置 * - * @param userConfigDTO 用户配置 - * @return org.dubhe.admin.domain.vo.UserConfigCreateVO 用户配置 VO + * @param userConfigSaveDTO 用户配置 */ - UserConfigCreateVO createOrUpdateUserConfig(UserConfigDTO userConfigDTO); + void saveUserConfig(UserConfigSaveDTO userConfigSaveDTO, String token); + + /** + * 重置密码 + * + * @return 重置密码结果集 + */ + DataResponseBody resetPassword(Long userId); + + /** + * 获取用户分配的资源总量 + * + * @return 资源配额总量统计 + */ + DataResponseBody getAllotResources(); } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/AuthCodeServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/AuthCodeServiceImpl.java index 758d57b..a79147f 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/AuthCodeServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/AuthCodeServiceImpl.java @@ -79,8 +79,8 @@ public class AuthCodeServiceImpl extends ServiceImpl imple Page page = authCodeQueryDTO.toPage(); QueryWrapper queryWrapper = new QueryWrapper<>(); - if (StringUtils.isNotEmpty(authCodeQueryDTO.getAuthCode())) { - queryWrapper.and(x -> x.eq("id", authCodeQueryDTO.getAuthCode()).or().like("authCOde", authCodeQueryDTO.getAuthCode())); + if (StringUtils.isNotEmpty(authCodeQueryDTO.getKeyword())) { + queryWrapper.and(x -> x.eq("id", authCodeQueryDTO.getKeyword()).or().like("authCOde", authCodeQueryDTO.getKeyword())); } //排序 diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/GpuResourceServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/GpuResourceServiceImpl.java new file mode 100644 index 0000000..5dd3b2f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/GpuResourceServiceImpl.java @@ -0,0 +1,223 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.service.impl; + +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import org.apache.commons.collections4.CollectionUtils; +import org.dubhe.admin.dao.GpuResourceMapper; +import org.dubhe.admin.domain.dto.*; +import org.dubhe.admin.domain.entity.GpuResource; +import org.dubhe.admin.domain.vo.GpuResourceQueryVO; +import org.dubhe.admin.service.GpuResourceService; +import org.dubhe.admin.service.UserService; +import org.dubhe.biz.base.constant.StringConstant; +import org.dubhe.biz.base.context.UserContext; +import org.dubhe.biz.base.exception.BusinessException; +import org.dubhe.biz.base.service.UserContextService; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.base.vo.UserConfigVO; +import org.dubhe.biz.base.vo.UserGpuConfigVO; +import org.dubhe.biz.db.utils.PageUtil; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.util.*; +import java.util.stream.Collectors; + +/** + * @description GPU资源管理 + * @date 2021-08-20 + */ +@Service +public class GpuResourceServiceImpl implements GpuResourceService { + + @Autowired + private GpuResourceMapper gpuResourceMapper; + + @Autowired + private UserContextService userContextService; + + @Autowired + private UserService userService; + + /** + * 查询GPU资源 + * @param gpuResourceQueryDTO 查询GPU资源请求实体 + * @return List gpuResourceSpecs GPU资源列表 + */ + @Override + public Map getGpuResource(GpuResourceQueryDTO gpuResourceQueryDTO) { + Page page = gpuResourceQueryDTO.toPage(); + //排序字段 + String sort = null == gpuResourceQueryDTO.getSort() ? StringConstant.ID : gpuResourceQueryDTO.getSort(); + QueryWrapper queryResourceWrapper = new QueryWrapper<>(); + queryResourceWrapper.eq(gpuResourceQueryDTO.getGpuType() != null, "gpu_type", gpuResourceQueryDTO.getGpuType()); + if (StringConstant.SORT_ASC.equals(gpuResourceQueryDTO.getOrder())) { + queryResourceWrapper.orderByAsc(StringUtils.humpToLine(sort)); + } else { + queryResourceWrapper.orderByDesc(StringUtils.humpToLine(sort)); + } + Page pageGpuResourceResult = gpuResourceMapper.selectPage(page, queryResourceWrapper); + //结果集处理 + //查询结果数 + page.setTotal(pageGpuResourceResult.getTotal()); + List gpuResource = pageGpuResourceResult.getRecords(); + List gpuResourceQueryVOS = new ArrayList<>(); + if (CollectionUtils.isNotEmpty(gpuResource)) { + gpuResourceQueryVOS = gpuResource.stream().map(x -> { + GpuResourceQueryVO gpuResourceQueryVO = new GpuResourceQueryVO(); + BeanUtils.copyProperties(x, gpuResourceQueryVO); + return gpuResourceQueryVO; + }).collect(Collectors.toList()); + } + return PageUtil.toPage(page, gpuResourceQueryVOS); + } + + /** + * 新增GPU资源 + * @param gpuResourceCreateDTO 新增GPU资源实体 + * @return List 新增GPU资源id + */ + @Override + @Transactional(rollbackFor = Exception.class) + public List create(GpuResourceCreateDTO gpuResourceCreateDTO) { + UserContext curUser = userContextService.getCurUser(); + //GPU资源校验 + QueryWrapper resourceWrapper = new QueryWrapper<>(); + resourceWrapper.eq("gpu_type", gpuResourceCreateDTO.getGpuType()) + .eq("gpu_model", gpuResourceCreateDTO.getGpuModel()); + if (gpuResourceMapper.selectCount(resourceWrapper) > 0) { + throw new BusinessException("GPU资源已存在"); + } + GpuResource gpuResource = new GpuResource(); + BeanUtils.copyProperties(gpuResourceCreateDTO, gpuResource); + try { + gpuResourceMapper.insert(gpuResource); + } catch (Exception e) { + LogUtil.error(LogEnum.SYS_ERR, "The user: {} saved the GpuResource parameters GpuResourceCreateDTO: {} was not successful. Failure reason: {}", curUser.getUsername(), gpuResourceCreateDTO, e); + throw new BusinessException("内部错误"); + } + return Collections.singletonList(gpuResource.getId()); + } + + /** + * 修改GPU资源 + * @param gpuResourceUpdateDTO 修改GPU资源实体 + * @return List 修改GPU资源id + */ + @Override + @Transactional(rollbackFor = Exception.class) + public List update(GpuResourceUpdateDTO gpuResourceUpdateDTO) { + UserContext curUser = userContextService.getCurUser(); + GpuResource gpuResource = new GpuResource(); + gpuResource.setId(gpuResourceUpdateDTO.getId()); + //规格名称校验 + QueryWrapper resourceWrapper = new QueryWrapper<>(); + resourceWrapper.eq("gpu_type", gpuResourceUpdateDTO.getGpuType()) + .eq("gpu_model", gpuResourceUpdateDTO.getGpuModel()).ne("id", gpuResourceUpdateDTO.getId()); + if (gpuResourceMapper.selectCount(resourceWrapper) > 0) { + throw new BusinessException("GPU资源已存在"); + } + gpuResource.setGpuType(gpuResourceUpdateDTO.getGpuType()).setGpuModel(gpuResourceUpdateDTO.getGpuModel()).setK8sLabelKey(gpuResourceUpdateDTO.getK8sLabelKey()); + try { + gpuResourceMapper.updateById(gpuResource); + } catch (Exception e) { + LogUtil.error(LogEnum.SYS_ERR, "The user: {} updated the GpuResource parameters gpuResourceUpdateDTO: {} was not successful. Failure reason :{}", curUser.getUsername(), gpuResourceUpdateDTO, e); + throw new BusinessException("内部错误"); + } + return Collections.singletonList(gpuResource.getId()); + } + + /** + * GPU资源删除 + * @param gpuResourceDeleteDTO GPU资源删除id集合 + */ + @Override + @Transactional(rollbackFor = Exception.class) + public void delete(GpuResourceDeleteDTO gpuResourceDeleteDTO) { + UserContext curUser = userContextService.getCurUser(); + Set idList = gpuResourceDeleteDTO.getIds(); + try { + gpuResourceMapper.deleteBatchIds(idList); + } catch (Exception e) { + LogUtil.error(LogEnum.SYS_ERR, "The user: {} Deleted the ResourceSpecs parameters resourceSpecsDeleteDTO: {} was not successful. Failure reason :{}", curUser.getUsername(), gpuResourceDeleteDTO, e); + throw new BusinessException("内部错误"); + } + } + + /** + * 查询GPU类型 + * @return List GPU类型列表 + */ + @Override + public List getGpuType() { + //查询GPU类型 + QueryWrapper queryGpuModelWrapper = new QueryWrapper<>(); + queryGpuModelWrapper.orderByDesc("id"); + List gpuResources = gpuResourceMapper.selectList(queryGpuModelWrapper); + List gpuTypes = new ArrayList<>(); + if (CollectionUtils.isNotEmpty(gpuResources)) { + gpuTypes = gpuResources.stream().map(GpuResource::getGpuType).distinct().collect(Collectors.toList()); + } + return gpuTypes; + } + + /** + * 查询用户GPU类型 + * @return List GPU类型列表 + */ + @Override + public Set getUserGpuType() { + UserContext curUser = userContextService.getCurUser(); + UserConfigVO userConfig = userService.findUserConfig(curUser.getId()); + Set userGpuTypes = new HashSet<>(); + if (CollectionUtils.isNotEmpty(userConfig.getGpuResources())) { + for (UserGpuConfigVO userGpuConfig : userConfig.getGpuResources()) { + if (userGpuConfig.getGpuLimit() > 0) { + userGpuTypes.add(userGpuConfig.getGpuType()); + } + } + } + return userGpuTypes; + } + + /** + * 根据用户GPU类型查询用户GPU资源 + * @return List 用户GPU资源列表 + */ + @Override + public List getUserGpuResource(UserGpuResourceQueryDTO userGpuResourceQueryDTO) { + UserContext curUser = userContextService.getCurUser(); + UserConfigVO userConfig = userService.findUserConfig(curUser.getId()); + Set userGpuModels = new HashSet<>(); + if (CollectionUtils.isNotEmpty(userConfig.getGpuResources())) { + for (UserGpuConfigVO userGpuConfig : userConfig.getGpuResources()) { + if (userGpuConfig.getGpuLimit() > 0 && userGpuResourceQueryDTO.getGpuType().equals(userGpuConfig.getGpuType())) { + userGpuModels.add(userGpuConfig.getGpuModel()); + } + } + } + QueryWrapper queryGpuModelWrapper = new QueryWrapper<>(); + queryGpuModelWrapper.orderByDesc("id").eq("gpu_type", userGpuResourceQueryDTO.getGpuType()).in("gpu_model", userGpuModels); + return gpuResourceMapper.selectList(queryGpuModelWrapper); + } +} \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/MenuServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/MenuServiceImpl.java index 908ee3a..8c5878b 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/MenuServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/MenuServiceImpl.java @@ -15,7 +15,6 @@ */ package org.dubhe.admin.service.impl; -import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; @@ -27,7 +26,6 @@ import org.dubhe.admin.domain.dto.MenuQueryDTO; import org.dubhe.admin.domain.dto.MenuUpdateDTO; import org.dubhe.admin.domain.dto.RoleSmallDTO; import org.dubhe.admin.domain.entity.Menu; -import org.dubhe.admin.domain.vo.MenuMetaVo; import org.dubhe.admin.domain.vo.MenuVo; import org.dubhe.admin.enums.MenuTypeEnum; import org.dubhe.admin.service.MenuService; @@ -44,6 +42,7 @@ import org.dubhe.biz.log.utils.LogUtil; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.CollectionUtils; import javax.servlet.http.HttpServletResponse; import java.io.IOException; @@ -150,13 +149,13 @@ public class MenuServiceImpl implements MenuService { .sort(resources.getSort()) .type(resources.getType()) .build(); - if(MenuTypeEnum.PAGE_TYPE.getValue().equals(resources.getType())){ + if (MenuTypeEnum.PAGE_TYPE.getValue().equals(resources.getType()) || MenuTypeEnum.DIR_TYPE.getValue().equals(resources.getType())) { menu.setBackTo(resources.getBackTo()); menu.setExtConfig(resources.getExtConfig()); } menuMapper.insert(menu); //管理员新增默认权限 - roleService.tiedRoleMenu(PermissionConstant.ADMIN_ROLE_ID,menu.getId()); + roleService.tiedRoleMenu(PermissionConstant.ADMIN_ROLE_ID, menu.getId()); return menuConvert.toDto(menu); } @@ -202,9 +201,9 @@ public class MenuServiceImpl implements MenuService { menu.setHidden(resources.getHidden()); menu.setComponentName(resources.getComponentName()); menu.setPermission(resources.getPermission()); - if(MenuTypeEnum.PAGE_TYPE.getValue().equals(resources.getType())){ + if (MenuTypeEnum.PAGE_TYPE.getValue().equals(resources.getType()) || MenuTypeEnum.DIR_TYPE.getValue().equals(resources.getType())) { ExtConfigDTO extConfigDTO = analyzeBackToValue(resources.getExtConfig()); - menu.setBackTo(Objects.isNull(extConfigDTO)?null:extConfigDTO.getBackTo()); + menu.setBackTo(Objects.isNull(extConfigDTO) ? null : extConfigDTO.getBackTo()); menu.setExtConfig(resources.getExtConfig()); } menuMapper.updateById(menu); @@ -215,16 +214,16 @@ public class MenuServiceImpl implements MenuService { * 解析扩展配置中 backTO 属性值 * * @param extConfig 扩展配置 - * @return ExtConfigDTO扩展配置 + * @return ExtConfigDTO扩展配置 */ - private ExtConfigDTO analyzeBackToValue(String extConfig){ + private ExtConfigDTO analyzeBackToValue(String extConfig) { ExtConfigDTO dto = ExtConfigDTO.builder().build(); try { - if(!Objects.isNull(extConfig)){ + if (!Objects.isNull(extConfig)) { dto = JSONObject.parseObject(extConfig, ExtConfigDTO.class); } - }catch (Exception e){ - LogUtil.error(LogEnum.SYS_ERR,"analyzeBackToValue error, params:{} , error:{}",JSONObject.toJSONString(extConfig),e); + } catch (Exception e) { + LogUtil.error(LogEnum.SYS_ERR, "analyzeBackToValue error, params:{} , error:{}", JSONObject.toJSONString(extConfig), e); } return dto; } @@ -358,7 +357,7 @@ public class MenuServiceImpl implements MenuService { if (menuDTO != null) { List menuDtoList = menuDTO.getChildren(); MenuVo menuVo = new MenuVo(); - menuVo.setName(ObjectUtil.isNotEmpty(menuDTO.getComponentName()) ? menuDTO.getComponentName() : menuDTO.getName()); + menuVo.setName(menuDTO.getComponentName()); // 一级目录需要加斜杠,不然会报警告 menuVo.setPath(menuDTO.getPid() == 0 ? "/" + menuDTO.getPath() : menuDTO.getPath()); menuVo.setHidden(menuDTO.getHidden()); @@ -370,7 +369,20 @@ public class MenuServiceImpl implements MenuService { menuVo.setComponent(menuDTO.getComponent()); } } - menuVo.setMeta(new MenuMetaVo(menuDTO.getName(), menuDTO.getIcon(), menuDTO.getLayout(), !menuDTO.getCache())); + Map metaMap = new HashMap<>(); + metaMap.put("title", menuDTO.getName()); + metaMap.put("icon", menuDTO.getIcon()); + metaMap.put("layout", menuDTO.getLayout()); + metaMap.put("noCache", !menuDTO.getCache()); + if (menuDTO.getExtConfig() != null) { + Map json = (Map) JSONObject.parse(menuDTO.getExtConfig()); + if(!CollectionUtils.isEmpty(json)){ + for (Object key : json.keySet()){ + metaMap.put(key.toString(),json.get(key)); + } + } + } + menuVo.setMeta(metaMap); if (menuDtoList != null && menuDtoList.size() != 0) { menuVo.setChildren(buildMenus(menuDtoList)); // 处理是一级菜单并且没有子菜单的情况 diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/PermissionServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/PermissionServiceImpl.java index a1bb2e0..94ae02d 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/PermissionServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/PermissionServiceImpl.java @@ -134,7 +134,7 @@ public class PermissionServiceImpl extends ServiceImpl map = new HashMap<>(2); if (trees.size() == 0) { - permissions.stream().filter(x -> !ids.contains(x.getId())).collect(Collectors.toList()); + trees = permissions.stream().filter(x -> !ids.contains(x.getId())).collect(Collectors.toList()); } Map page = new HashMap<>(3); diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/RecycleTaskServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/RecycleTaskServiceImpl.java index c6d3acd..02a35f4 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/RecycleTaskServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/RecycleTaskServiceImpl.java @@ -329,7 +329,7 @@ public class RecycleTaskServiceImpl implements RecycleTaskService { } String emptyDir = recycleFileTmpPath + randomPath + File.separator; LogUtil.debug(LogEnum.GARBAGE_RECYCLE, "recycle task sourcePath:{},emptyDir:{}", sourcePath, emptyDir); - Process process = Runtime.getRuntime().exec(new String[]{"/bin/sh", "-c", String.format(ShellFileStoreApiImpl.DEL_COMMAND, userName, ip, emptyDir, emptyDir, sourcePath, emptyDir, sourcePath)}); + Process process = Runtime.getRuntime().exec(new String[]{"/bin/sh", "-c", String.format(ShellFileStoreApiImpl.DEL_COMMAND, emptyDir, emptyDir, sourcePath, emptyDir, sourcePath)}); return processRecycle(process); } else { LogUtil.error(LogEnum.GARBAGE_RECYCLE, "file recycle is failed! sourcePath:{}", sourcePath); @@ -460,7 +460,7 @@ public class RecycleTaskServiceImpl implements RecycleTaskService { String delRealPath = fileStoreApi.formatPath(sourcePath + File.separator + fileName + File.separator + directoryName); delRealPath = delRealPath.endsWith(File.separator) ? delRealPath : delRealPath + File.separator; String emptyDir = invalidFileTmpPath + directoryName + File.separator; - Process process = Runtime.getRuntime().exec(new String[]{"/bin/sh", "-c", String.format(ShellFileStoreApiImpl.DEL_COMMAND, userName, ip, emptyDir, emptyDir, delRealPath, emptyDir, delRealPath)}); + Process process = Runtime.getRuntime().exec(new String[]{"/bin/sh", "-c", String.format(ShellFileStoreApiImpl.DEL_COMMAND, emptyDir, emptyDir, delRealPath, emptyDir, delRealPath)}); Integer deleteStatus = process.waitFor(); LogUtil.info(LogEnum.GARBAGE_RECYCLE, "recycle resources path:{},recycle status:{}", delRealPath, deleteStatus); } catch (Exception e) { diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java index 4f3dc05..033e13a 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java @@ -19,21 +19,27 @@ package org.dubhe.admin.service.impl; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import org.apache.commons.collections4.CollectionUtils; +import org.dubhe.admin.client.SystemNodeClient; import org.dubhe.admin.dao.ResourceSpecsMapper; -import org.dubhe.admin.domain.dto.ResourceSpecsCreateDTO; -import org.dubhe.admin.domain.dto.ResourceSpecsDeleteDTO; -import org.dubhe.admin.domain.dto.ResourceSpecsQueryDTO; -import org.dubhe.admin.domain.dto.ResourceSpecsUpdateDTO; +import org.dubhe.admin.dao.UserGpuConfigMapper; +import org.dubhe.admin.domain.dto.*; import org.dubhe.admin.domain.entity.ResourceSpecs; +import org.dubhe.admin.domain.entity.UserGpuConfig; import org.dubhe.admin.domain.vo.ResourceSpecsQueryVO; import org.dubhe.admin.service.ResourceSpecsService; +import org.dubhe.admin.service.UserService; +import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.constant.StringConstant; import org.dubhe.biz.base.context.UserContext; import org.dubhe.biz.base.dto.QueryResourceSpecsDTO; +import org.dubhe.biz.base.dto.QueryUserK8sResourceDTO; import org.dubhe.biz.base.exception.BusinessException; import org.dubhe.biz.base.service.UserContextService; import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.base.vo.DataResponseBody; import org.dubhe.biz.base.vo.QueryResourceSpecsVO; +import org.dubhe.biz.base.vo.QueryUserResourceSpecsVO; +import org.dubhe.biz.base.vo.UserConfigVO; import org.dubhe.biz.db.utils.PageUtil; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; @@ -58,6 +64,15 @@ public class ResourceSpecsServiceImpl implements ResourceSpecsService { @Autowired private UserContextService userContextService; + @Autowired + private UserService userService; + + @Autowired + private SystemNodeClient systemNodeClient; + + @Autowired + private UserGpuConfigMapper userGpuConfigMapper; + /** * 查询资源规格 * @param resourceSpecsQueryDTO 查询资源规格请求实体 @@ -72,6 +87,13 @@ public class ResourceSpecsServiceImpl implements ResourceSpecsService { queryResourceSpecsWrapper.like(resourceSpecsQueryDTO.getSpecsName() != null, "specs_name", resourceSpecsQueryDTO.getSpecsName()) .eq(resourceSpecsQueryDTO.getResourcesPoolType() != null, "resources_pool_type", resourceSpecsQueryDTO.getResourcesPoolType()) .eq(resourceSpecsQueryDTO.getModule() != null, "module", resourceSpecsQueryDTO.getModule()); + if (resourceSpecsQueryDTO.getMultiGpu() != null) { + if (resourceSpecsQueryDTO.getMultiGpu()) { + queryResourceSpecsWrapper.gt("gpu_num", MagicNumConstant.ONE); + } else { + queryResourceSpecsWrapper.eq("gpu_num", MagicNumConstant.ONE); + } + } if (StringConstant.SORT_ASC.equals(resourceSpecsQueryDTO.getOrder())) { queryResourceSpecsWrapper.orderByAsc(StringUtils.humpToLine(sort)); } else { @@ -206,4 +228,100 @@ public class ResourceSpecsServiceImpl implements ResourceSpecsService { BeanUtils.copyProperties(resourceSpecs, queryResourceSpecsVO); return queryResourceSpecsVO; } + + + /** + * 查询用户资源规格 + * @param queryUserResourceSpecsDTO 查询用户资源规格请求实体 + * @return List 用户资源规格返回结果实体类集合 + */ + @Override + public List getUserResourceSpecs(QueryUserResourceSpecsDTO queryUserResourceSpecsDTO) { + Long userId; + if (queryUserResourceSpecsDTO.getUserId() == null) { + userId = userContextService.getCurUser().getId(); + } else { + userId = queryUserResourceSpecsDTO.getUserId(); + } + UserConfigVO userConfig = userService.findUserConfig(userId); + if (queryUserResourceSpecsDTO.getResourcesPoolNode() == null) { + queryUserResourceSpecsDTO.setResourcesPoolNode(MagicNumConstant.ONE); + } + QueryWrapper queryResourceSpecsWrapper = new QueryWrapper<>(); + queryResourceSpecsWrapper.eq("module", queryUserResourceSpecsDTO.getModule()).eq("resources_pool_type", queryUserResourceSpecsDTO.getResourcesPoolType()) + .le("cpu_num", userConfig.getCpuLimit()).le("mem_num", userConfig.getMemoryLimit() * MagicNumConstant.ONE_THOUSAND_TWENTY_FOUR); + if (queryUserResourceSpecsDTO.getResourcesPoolType()) { + if (queryUserResourceSpecsDTO.getGpuModel() == null || queryUserResourceSpecsDTO.getK8sLabelKey() == null) { + throw new BusinessException("传参错误"); + } + UserGpuConfig userGpuConfig = userGpuConfigMapper.selectOne(new QueryWrapper<>(new UserGpuConfig().setUserId(userId).setGpuModel(queryUserResourceSpecsDTO.getGpuModel()) + .setK8sLabelKey(queryUserResourceSpecsDTO.getK8sLabelKey())).last(" limit 1 ")); + Integer gpuLimit = null; + if (userGpuConfig != null) { + gpuLimit = userGpuConfig.getGpuLimit(); + } + // 如果老用户未初始化GPU配置,则设置默认配置 + if (userGpuConfig == null && userGpuConfigMapper.selectCountByUserId(userId) == 0) { + UserGpuConfig preUserGpuConfig = userGpuConfigMapper.selectOne(new QueryWrapper<>(new UserGpuConfig().setUserId(0L).setGpuModel(queryUserResourceSpecsDTO.getGpuModel()).setK8sLabelKey(queryUserResourceSpecsDTO.getK8sLabelKey()))); + if (preUserGpuConfig != null) { + gpuLimit = preUserGpuConfig.getGpuLimit(); + } + } + if (gpuLimit != null) { + queryResourceSpecsWrapper.le("gpu_num", gpuLimit); + } + } + + if (queryUserResourceSpecsDTO.getMultiGpu() != null) { + if (queryUserResourceSpecsDTO.getMultiGpu()) { + queryResourceSpecsWrapper.gt("gpu_num", MagicNumConstant.ONE); + } else { + queryResourceSpecsWrapper.eq("gpu_num", MagicNumConstant.ONE); + } + } + + queryResourceSpecsWrapper.orderByAsc("cpu_num"); + List resourceSpecs = resourceSpecsMapper.selectList(queryResourceSpecsWrapper); + List queryUserResourceSpecsVOS = new ArrayList<>(); + List QueryUserK8sResources = new ArrayList<>(); + if (CollectionUtils.isNotEmpty(resourceSpecs)) { + QueryUserK8sResources = resourceSpecs.stream().map(x -> { + QueryUserK8sResourceDTO queryUserK8sResourceDTO = new QueryUserK8sResourceDTO(); + BeanUtils.copyProperties(x, queryUserK8sResourceDTO); + queryUserK8sResourceDTO.setUserId(userId).setResourcesPoolNode(queryUserResourceSpecsDTO.getResourcesPoolNode()); + if (queryUserResourceSpecsDTO.getResourcesPoolType()) { + queryUserK8sResourceDTO.setGpuModel(queryUserResourceSpecsDTO.getGpuModel()).setK8sLabelKey(queryUserResourceSpecsDTO.getK8sLabelKey()); + } + return queryUserK8sResourceDTO; + }).collect(Collectors.toList()); + } + //过滤k8s集群资源 + if (CollectionUtils.isNotEmpty(QueryUserK8sResources)) { + DataResponseBody> dataResponseBody = systemNodeClient.queryUserK8sResource(QueryUserK8sResources); + if (!dataResponseBody.succeed()) { + throw new BusinessException("dubhe-k8s服务调用异常,查询集群资源是否可用失败"); + } + return dataResponseBody.getData(); + } + return queryUserResourceSpecsVOS; + } + + /** + * 查询资源规格 + * @param id 资源规格id + * @return QueryResourceSpecsVO 资源规格返回结果实体类 + */ + @Override + public QueryResourceSpecsVO queryTadlResourceSpecs(Long id) { + LogUtil.info(LogEnum.BIZ_SYS,"Query resource specification information with resource id:{}",id); + ResourceSpecs resourceSpecs = resourceSpecsMapper.selectById(id); + LogUtil.info(LogEnum.BIZ_SYS,"Obtain resource specification information:{} ",resourceSpecs); + if (resourceSpecs == null) { + throw new BusinessException("资源规格不存在或已被删除"); + } + QueryResourceSpecsVO queryResourceSpecsVO = new QueryResourceSpecsVO(); + BeanUtils.copyProperties(resourceSpecs, queryResourceSpecsVO); + LogUtil.info(LogEnum.BIZ_SYS,"Return resource specification information :{} ",queryResourceSpecsVO); + return queryResourceSpecsVO; + } } \ No newline at end of file diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserGroupServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserGroupServiceImpl.java index 8c68435..5d2cbb2 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserGroupServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserGroupServiceImpl.java @@ -22,6 +22,7 @@ import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import org.dubhe.admin.dao.UserGroupMapper; +import org.dubhe.admin.dao.UserMapper; import org.dubhe.admin.dao.UserRoleMapper; import org.dubhe.admin.domain.dto.*; import org.dubhe.admin.domain.entity.Group; @@ -32,6 +33,7 @@ import org.dubhe.admin.service.UserGroupService; import org.dubhe.admin.service.UserService; import org.dubhe.biz.base.constant.StringConstant; import org.dubhe.biz.base.context.UserContext; +import org.dubhe.biz.base.dto.UserConfigSaveDTO; import org.dubhe.biz.base.exception.BusinessException; import org.dubhe.biz.base.service.UserContextService; import org.dubhe.biz.base.utils.ReflectionUtils; @@ -39,9 +41,12 @@ import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.db.utils.PageUtil; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.cloud.authconfig.factory.PasswordEncoderFactory; import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.dao.DuplicateKeyException; +import org.springframework.security.crypto.password.PasswordEncoder; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -55,10 +60,16 @@ import java.util.stream.Collectors; @Service public class UserGroupServiceImpl implements UserGroupService { + @Value("${initial_password}") + private String initialPassword; + @Autowired private UserGroupMapper userGroupMapper; @Autowired + private UserMapper userMapper; + + @Autowired private UserContextService userContextService; @Autowired @@ -248,7 +259,7 @@ public class UserGroupServiceImpl implements UserGroupService { * @param userGroupUpdDTO 批量删除用户组用户DTO */ @Override - public void delUser(UserGroupUpdDTO userGroupUpdDTO) { + public void delUser(UserGroupUpdDTO userGroupUpdDTO, String accessToken) { //获取用户组的成员id List userList = userGroupMapper.queryUserByGroupId(userGroupUpdDTO.getGroupId()); userGroupMapper.delUserByGroupId(userGroupUpdDTO.getGroupId()); @@ -258,7 +269,7 @@ public class UserGroupServiceImpl implements UserGroupService { ids.add(user.getId()); } } - userService.delete(ids); + userService.delete(ids, accessToken); } /** @@ -289,4 +300,45 @@ public class UserGroupServiceImpl implements UserGroupService { //添加用户的新角色 userRoleMapper.insertBatchs(userRoleList); } + + /** + * 批量重置用户组用户的密码 + * + * @param groupId + */ + @Override + @Transactional(rollbackFor = Exception.class) + public void resetUserPassword(Long groupId) { + //获取用户组的成员id + List userList = userGroupMapper.queryUserByGroupId(groupId); + Set ids = new HashSet<>(); + if (CollUtil.isNotEmpty(userList)) { + for (User user : userList) { + ids.add(user.getId()); + } + } + PasswordEncoder passwordEncoder = PasswordEncoderFactory.getPasswordEncoder(); + //重置为默认密码123456,加密密码 + String encode = passwordEncoder.encode(initialPassword); + + LambdaUpdateWrapper updateWrapper = new LambdaUpdateWrapper<>(); + updateWrapper.in(User::getId, ids); + updateWrapper.set(User::getPassword,encode); + updateWrapper.set(User::getLastPasswordResetTime,new Date()); + userService.update(updateWrapper); + } + + @Override + public void saveUserConfig(UserGroupConfigSaveDTO userGroupConfigSaveDTO) { + UserConfigSaveDTO userConfigSaveDTO = new UserConfigSaveDTO(); + BeanUtils.copyProperties(userGroupConfigSaveDTO,userConfigSaveDTO); + + List users = userGroupMapper.queryUserByGroupId(userGroupConfigSaveDTO.getGroupId()); + if(CollUtil.isNotEmpty(users)) { + users.forEach(user -> { + userConfigSaveDTO.setUserId(user.getId()); + userService.saveUserConfig(userConfigSaveDTO,null); + }); + } + } } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserResourceServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserResourceServiceImpl.java new file mode 100644 index 0000000..71a5375 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserResourceServiceImpl.java @@ -0,0 +1,314 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.service.impl; + +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.util.StrUtil; +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.google.common.collect.Maps; +import org.dubhe.admin.client.ResourceNamespaceClient; +import org.dubhe.admin.dao.UserConfigMapper; +import org.dubhe.admin.dao.UserGpuConfigMapper; +import org.dubhe.admin.dao.UserMapper; +import org.dubhe.admin.domain.dto.UserResourceListDTO; +import org.dubhe.admin.domain.dto.UserResourceQueryDTO; +import org.dubhe.admin.domain.entity.UserConfig; +import org.dubhe.admin.domain.entity.UserGpuConfig; +import org.dubhe.admin.domain.vo.UserLimitConfigVO; +import org.dubhe.admin.domain.vo.UserResourceResVO; +import org.dubhe.admin.enums.ResourceTypeEnum; +import org.dubhe.admin.enums.StatTypeEnum; +import org.dubhe.admin.service.UserResourceService; +import org.dubhe.biz.base.constant.NumberConstant; +import org.dubhe.biz.base.constant.StringConstant; +import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.base.constant.UserConstant; +import org.dubhe.biz.base.exception.BusinessException; +import org.dubhe.biz.base.utils.MathUtils; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.GpuAllotVO; +import org.dubhe.biz.base.vo.UserAllotVO; +import org.dubhe.biz.db.utils.PageUtil; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * @description 用户资源统计实现层 + * @date 2021-11-23 + */ +@Service +public class UserResourceServiceImpl implements UserResourceService { + + @Autowired + private UserConfigMapper userConfigMapper; + + @Autowired + private UserGpuConfigMapper userGpuConfigMapper; + + @Autowired + private ResourceNamespaceClient resourceNamespaceClient; + + @Autowired + private UserMapper userMapper; + + private Map> gpuMap = Maps.newHashMap(); + private Map> cpuMap = Maps.newHashMap(); + private Map> memMap = Maps.newHashMap(); + private Map gpuAllotMap = Maps.newHashMap(); + + private final static List SUMDAYS = new ArrayList(); + + static { + SUMDAYS.add(UserConstant.UNIT_7D); + SUMDAYS.add(UserConstant.UNIT_15D); + } + + /** + * 用户资源Top10统计 + * + * @param resourceQueryDTO 请求DTO实体 + * @return List 用户Top10资源列表 + */ + @Override + public List getResourceTotal(UserResourceQueryDTO resourceQueryDTO) { + List userAllotList; + //获取资源配额 + if (resourceQueryDTO.getStatType().equals(StatTypeEnum.ALLOT_TYPE.getCode())) { + userAllotList = toResourceAllot(resourceQueryDTO); + } else { + userAllotList = toResourceUsage(resourceQueryDTO); + } + return userAllotList; + } + + /** + * 用户资源统计 + * + * @return List 用户资源统计列表VO + */ + @Override + public Map getResourceList(UserResourceListDTO resourceListDTO) { + List userResourceResList = new ArrayList<>(); + Page page = resourceListDTO.toPage(); + String sort = StrUtil.isEmpty(resourceListDTO.getSort()) ? ResourceTypeEnum.GPU_TYPE.getDesc() : resourceListDTO.getSort(); + String order = StrUtil.isEmpty(resourceListDTO.getOrder()) ? StringConstant.SORT_DESC : resourceListDTO.getOrder(); + + List userLimitConfigs = userConfigMapper.selectLimitSum(page, sort, order); + List userIds = userLimitConfigs.stream().map(UserLimitConfigVO::getUserId).collect(Collectors.toList()); + String namespaces = userIds.stream().map(id -> UserConstant.NAMESPACE_PREFIX + id).collect(Collectors.joining("|")); + //查询gpu具体型号的配额 + toGpuAllotMap(userIds); + //查询不同条件下的资源使用峰值 + SUMDAYS.stream().forEach(day -> { + EnumSet.allOf(ResourceTypeEnum.class).forEach(code -> { + toResourceUsageMap(code.getCode(), day, namespaces); + }); + }); + + + for (UserLimitConfigVO userLimitConfig : userLimitConfigs) { + UserResourceResVO userResourceRes = new UserResourceResVO(); + userResourceRes.setId(userLimitConfig.getUserId()) + .setUserName(userLimitConfig.getUserName()) + .setNickName(userLimitConfig.getNickName()) + .setGpu(userLimitConfig.getGpu()) + .setCpu(userLimitConfig.getCpu()) + .setMem(userLimitConfig.getMem()); + if (gpuAllotMap.containsKey(userLimitConfig.getUserId())) { + userResourceRes.setGpuModelAllots(new ArrayList<>()); + } else { + userResourceRes.setGpuModelAllots(gpuAllotMap.get(userLimitConfig.getUserId()).getGpuAllotList()); + } + userResourceRes.setGpu7unit(gpuMap.get(UserConstant.UNIT_7D).getOrDefault(userLimitConfig.getUserId(), SymbolConstant.ZERO)); + userResourceRes.setGpu15unit(gpuMap.get(UserConstant.UNIT_15D).getOrDefault(userLimitConfig.getUserId(), SymbolConstant.ZERO)); + userResourceRes.setGpu7(MathUtils.floatDivision(userResourceRes.getGpu7unit(), userResourceRes.getGpu(), NumberConstant.NUMBER_2).toString()); + userResourceRes.setGpu15(MathUtils.floatDivision(userResourceRes.getGpu15unit(), userResourceRes.getGpu(), NumberConstant.NUMBER_2).toString()); + + userResourceRes.setCpu7unit(MathUtils.floatDivision(cpuMap.get(UserConstant.UNIT_7D).getOrDefault(userLimitConfig.getUserId(), SymbolConstant.ZERO), SymbolConstant.ONE, NumberConstant.NUMBER_2).toString()); + userResourceRes.setCpu15unit(MathUtils.floatDivision(cpuMap.get(UserConstant.UNIT_15D).getOrDefault(userLimitConfig.getUserId(), SymbolConstant.ZERO), SymbolConstant.ONE, NumberConstant.NUMBER_2).toString()); + userResourceRes.setCpu7(MathUtils.floatDivision(userResourceRes.getCpu7unit(), userResourceRes.getCpu(), NumberConstant.NUMBER_2).toString()); + userResourceRes.setCpu15(MathUtils.floatDivision(userResourceRes.getCpu15unit(), userResourceRes.getCpu(), NumberConstant.NUMBER_2).toString()); + + userResourceRes.setMem7unit(MathUtils.floatDivision(memMap.get(UserConstant.UNIT_7D).getOrDefault(userLimitConfig.getUserId(), SymbolConstant.ZERO), StringConstant.MEM_UNIT, NumberConstant.NUMBER_2).toString()); + userResourceRes.setMem15unit(MathUtils.floatDivision(memMap.get(UserConstant.UNIT_15D).getOrDefault(userLimitConfig.getUserId(), SymbolConstant.ZERO), StringConstant.MEM_UNIT, NumberConstant.NUMBER_2).toString()); + userResourceRes.setMem7(MathUtils.floatDivision(userResourceRes.getMem7unit(), userResourceRes.getMem(), NumberConstant.NUMBER_2).toString()); + userResourceRes.setMem15(MathUtils.floatDivision(userResourceRes.getMem15unit(), userResourceRes.getMem(), NumberConstant.NUMBER_2).toString()); + + userResourceResList.add(userResourceRes); + } + return PageUtil.toPage(page, userResourceResList); + } + + /** + * 根据namespace-userId批量查询资源用量峰值 + * + * @param resourceType 资源类型 + * @param sumDay 查询周期 + * @param namespaces 拼接的namespace字符串,例:namespace-1 | namespace-2 | namespace-3 + */ + private void toResourceUsageMap(Integer resourceType, String sumDay, String namespaces) { + DataResponseBody> result = resourceNamespaceClient.getResourceUsageByUser(resourceType, sumDay, namespaces); + if (!result.succeed()) { + throw new BusinessException("查询某用户用量峰值远程调用失败"); + } + if (resourceType.equals(ResourceTypeEnum.GPU_TYPE.getCode())) { + gpuMap.put(sumDay, result.getData()); + } else if (resourceType.equals(ResourceTypeEnum.CPU_TYPE.getCode())) { + cpuMap.put(sumDay, result.getData()); + } else if (resourceType.equals(ResourceTypeEnum.MEMORY_TYPE.getCode())) { + memMap.put(sumDay, result.getData()); + } + } + + /** + * 统计用户GPU具体型号的配额 + * + * @param userIds 用户id集合 + */ + private void toGpuAllotMap(List userIds) { + //按型号获取gpu分配总量 + List userGpuConfigs = userGpuConfigMapper.selectList(new LambdaQueryWrapper().in(UserGpuConfig::getUserId, userIds)); + for (Long userId : userIds) { + UserAllotVO userAllotVO = new UserAllotVO(); + List gpuAllots = new ArrayList<>(); + for (UserGpuConfig gpuConfig : userGpuConfigs) { + if (userId.equals(gpuConfig.getUserId())) { + GpuAllotVO gpuAllotVO = new GpuAllotVO(); + gpuAllotVO.setGpuModel(gpuConfig.getGpuModel()); + gpuAllotVO.setAllotTotal(gpuConfig.getGpuLimit()); + gpuAllots.add(gpuAllotVO); + userAllotVO.setAllotTotal(MathUtils.add(userAllotVO.getAllotTotal(), gpuConfig.getGpuLimit().toString())); + userAllotVO.setUserId(gpuConfig.getUserId()); + userAllotVO.setGpuAllotList(gpuAllots); + } + } + gpuAllotMap.put(userId, userAllotVO); + } + } + + /** + * 远程调用prometheus统计用户资源使用峰值 + * + * @param resourceQueryDTO 查询DTO实体 + * @return List GPU型号资源配额列表 + */ + private List toResourceUsage(UserResourceQueryDTO resourceQueryDTO) { + List userAllotList = new ArrayList<>(); + DataResponseBody> result = resourceNamespaceClient.getResourceNamespace(resourceQueryDTO.getResourceType(), resourceQueryDTO.getSumDay()); + if (!result.succeed()) { + throw new BusinessException("查询用户用量峰值远程调用失败"); + } + if (CollUtil.isNotEmpty(result.getData())) { + userAllotList = result.getData().stream().map(userAllotVO -> { + Long userId = Long.valueOf(userAllotVO.getUserName().replaceAll(UserConstant.NAMESPACE_PREFIX, StrUtil.EMPTY)); + userAllotVO.setUserName(userMapper.findUserNameById(userId)); + userAllotVO.setUserId(userId); + if (!resourceQueryDTO.getResourceType().equals(ResourceTypeEnum.GPU_TYPE.getCode())) { + userAllotVO.setAllotTotal(MathUtils.floatDivision(userAllotVO.getAllotTotal(), StringConstant.MEM_UNIT, 2).toString()); + } else { + userAllotVO.setAllotTotal(userAllotVO.getAllotTotal()); + } + + return userAllotVO; + }).collect(Collectors.toList()); + } + if (resourceQueryDTO.getStatType().equals(StatTypeEnum.USAGE_RATE_TYPE.getCode())) { + userAllotList = toUserAllotById(resourceQueryDTO.getResourceType(), userAllotList); + } + return userAllotList; + } + + /** + * TOP10资源统计配额 + * + * @param resourceQueryDTO 用户资源统计DTO + * @return List 用户Top10资源列表 + */ + private List toResourceAllot(UserResourceQueryDTO resourceQueryDTO) { + List userAllotList = new ArrayList<>(); + switch (resourceQueryDTO.getResourceType()) { + case 2: + userAllotList = userConfigMapper.selectCpuAllotTotal(); + break; + case 3: + userAllotList = userConfigMapper.selectMemoryAllotTotal(); + break; + case 1: + List gpuConfigList = userGpuConfigMapper.selectAllotTotal(); + for (UserGpuConfig gpuConfig : gpuConfigList) { + UserAllotVO userAllotVO = new UserAllotVO(); + userAllotVO.setUserName(gpuConfig.getUserName()); + userAllotVO.setAllotTotal(gpuConfig.getGpuLimit().toString()); + userAllotVO.setGpuAllotList(userGpuConfigMapper.selectGpuModelTotal(gpuConfig.getUserId())); + userAllotList.add(userAllotVO); + } + default: + break; + } + return userAllotList; + } + + /** + * 用户资源配额统计 + * + * @param resourceType 资源类型 + * @param userAllotList 用户Top10资源列表 + * @return List 用户Top10资源列表 + */ + private List toUserAllotById(Integer resourceType, List userAllotList) { + switch (resourceType) { + //GPU配额总量 + case 1: + userAllotList = userAllotList.stream().map(userAllotVO -> { + int gpuSum = userGpuConfigMapper.selectGpuLimitSum(userAllotVO.getUserId()); + userAllotVO.setAllotTotal(MathUtils.floatDivision(userAllotVO.getAllotTotal(), String.valueOf(gpuSum), NumberConstant.NUMBER_2).toString()); + return userAllotVO; + }).collect(Collectors.toList()); + break; + //CPU配额总量 + case 2: + userAllotList = userAllotList.stream().map(userAllotVO -> { + UserConfig userConfig = userConfigMapper.selectLimitSumByUser(userAllotVO.getUserId()); + if (userConfig != null) { + userAllotVO.setAllotTotal(MathUtils.floatDivision(userAllotVO.getAllotTotal(), String.valueOf(userConfig.getCpuLimit()), 2).toString()); + } + return userAllotVO; + }).collect(Collectors.toList()); + break; + //内存配额总量 + case 3: + userAllotList = userAllotList.stream().map(userAllotVO -> { + UserConfig userConfig = userConfigMapper.selectLimitSumByUser(userAllotVO.getUserId()); + if (userConfig != null) { + userAllotVO.setAllotTotal(MathUtils.floatDivision(userAllotVO.getAllotTotal(), String.valueOf(userConfig.getMemoryLimit()), NumberConstant.NUMBER_2).toString()); + } + return userAllotVO; + }).collect(Collectors.toList()); + break; + default: + break; + } + return userAllotList; + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java index babb286..bbd9716 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java @@ -16,6 +16,7 @@ */ package org.dubhe.admin.service.impl; +import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import cn.hutool.crypto.asymmetric.KeyType; @@ -26,18 +27,39 @@ import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import org.dubhe.admin.async.CleanupUserResourcesAsync; import org.dubhe.admin.client.AuthServiceClient; +import org.dubhe.admin.client.GpuConfigClient; import org.dubhe.admin.client.ResourceQuotaClient; -import org.dubhe.admin.dao.*; -import org.dubhe.admin.domain.dto.*; +import org.dubhe.admin.client.template.GpuConfigTemplateClient; +import org.dubhe.admin.client.template.ObtainAccessToken; +import org.dubhe.admin.client.template.ResourceQuotaTemplateClient; +import org.dubhe.admin.dao.MenuMapper; +import org.dubhe.admin.dao.PermissionMapper; +import org.dubhe.admin.dao.RoleMapper; +import org.dubhe.admin.dao.TeamMapper; +import org.dubhe.admin.dao.UserAvatarMapper; +import org.dubhe.admin.dao.UserConfigMapper; +import org.dubhe.admin.dao.UserGpuConfigMapper; +import org.dubhe.admin.dao.UserMapper; +import org.dubhe.admin.dao.UserRoleMapper; +import org.dubhe.admin.domain.dto.AuthUserDTO; +import org.dubhe.admin.domain.dto.EmailDTO; +import org.dubhe.admin.domain.dto.UserCenterUpdateDTO; +import org.dubhe.admin.domain.dto.UserCreateDTO; +import org.dubhe.admin.domain.dto.UserEmailUpdateDTO; +import org.dubhe.admin.domain.dto.UserQueryDTO; +import org.dubhe.admin.domain.dto.UserRegisterDTO; +import org.dubhe.admin.domain.dto.UserRegisterMailDTO; +import org.dubhe.admin.domain.dto.UserResetPasswordDTO; +import org.dubhe.admin.domain.dto.UserUpdateDTO; import org.dubhe.admin.domain.entity.Role; import org.dubhe.admin.domain.entity.User; import org.dubhe.admin.domain.entity.UserAvatar; import org.dubhe.admin.domain.entity.UserConfig; +import org.dubhe.admin.domain.entity.UserGpuConfig; import org.dubhe.admin.domain.entity.UserRole; import org.dubhe.admin.domain.vo.EmailVo; -import org.dubhe.admin.domain.vo.UserConfigCreateVO; -import org.dubhe.admin.domain.vo.UserConfigVO; import org.dubhe.admin.domain.vo.UserVO; import org.dubhe.admin.enums.UserMailCodeEnum; import org.dubhe.admin.event.EmailEventPublisher; @@ -48,16 +70,29 @@ import org.dubhe.biz.base.constant.AuthConst; import org.dubhe.biz.base.constant.ResponseCode; import org.dubhe.biz.base.constant.UserConstant; import org.dubhe.biz.base.context.UserContext; -import org.dubhe.biz.base.dto.*; +import org.dubhe.biz.base.dto.GpuConfigDTO; +import org.dubhe.biz.base.dto.Oauth2TokenDTO; +import org.dubhe.biz.base.dto.ResourceQuotaDTO; +import org.dubhe.biz.base.dto.SysPermissionDTO; +import org.dubhe.biz.base.dto.SysRoleDTO; +import org.dubhe.biz.base.dto.SysUserConfigDTO; +import org.dubhe.biz.base.dto.SysUserGpuConfigDTO; +import org.dubhe.biz.base.dto.TeamDTO; +import org.dubhe.biz.base.dto.UserConfigSaveDTO; +import org.dubhe.biz.base.dto.UserDTO; +import org.dubhe.biz.base.dto.UserGpuConfigDTO; import org.dubhe.biz.base.enums.BaseErrorCodeEnum; import org.dubhe.biz.base.enums.SwitchEnum; import org.dubhe.biz.base.exception.BusinessException; import org.dubhe.biz.base.exception.CaptchaException; -import org.dubhe.biz.base.utils.DateUtil; import org.dubhe.biz.base.utils.Md5Util; import org.dubhe.biz.base.utils.RandomUtil; import org.dubhe.biz.base.utils.RsaEncrypt; import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.GpuAllotVO; +import org.dubhe.biz.base.vo.UserAllotResourceVO; +import org.dubhe.biz.base.vo.UserConfigVO; +import org.dubhe.biz.base.vo.UserGpuConfigVO; import org.dubhe.biz.dataresponse.factory.DataResponseFactory; import org.dubhe.biz.db.utils.PageUtil; import org.dubhe.biz.db.utils.WrapperHelp; @@ -65,6 +100,7 @@ import org.dubhe.biz.file.utils.DubheFileUtil; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.biz.permission.annotation.DataPermissionMethod; +import org.dubhe.biz.permission.aspect.PermissionAspect; import org.dubhe.biz.redis.utils.RedisUtils; import org.dubhe.cloud.authconfig.dto.JwtUserDTO; import org.dubhe.cloud.authconfig.factory.PasswordEncoderFactory; @@ -73,15 +109,28 @@ import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.cglib.beans.BeanMap; +import org.springframework.cloud.context.config.annotation.RefreshScope; import org.springframework.security.crypto.password.PasswordEncoder; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.transaction.support.TransactionSynchronizationAdapter; +import org.springframework.transaction.support.TransactionSynchronizationManager; import org.springframework.util.CollectionUtils; import javax.annotation.Resource; import javax.servlet.http.HttpServletResponse; import java.io.IOException; -import java.util.*; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; /** @@ -89,6 +138,7 @@ import java.util.stream.Collectors; * @date 2020-11-26 */ @Service +@RefreshScope public class UserServiceImpl extends ServiceImpl implements UserService { @Value("${rsa.private_key}") @@ -98,7 +148,7 @@ public class UserServiceImpl extends ServiceImpl implements Us private String initialPassword; @Value("${user.config.notebook-delay-delete-time}") - private Integer defaultNotebookDelayDeleteTime; + private Integer userConfigNotebookDelayDeleteTime; @Value("${user.config.cpu-limit}") private Integer cpuLimit; @@ -106,8 +156,17 @@ public class UserServiceImpl extends ServiceImpl implements Us @Value("${user.config.memory-limit}") private Integer memoryLimit; - @Value("${user.config.gpu-limit}") - private Integer gpuLimit; + @Value("${user.config.gpu-limit.gpu-type}") + private String gpuType; + + @Value("${user.config.gpu-limit.gpu-model}") + private String gpuModel; + + @Value("${user.config.gpu-limit.k8s-label-key}") + private String k8sLabelKey; + + @Value("${user.config.gpu-limit.gpu-num-limit}") + private Integer gpuNumLimit; @Autowired private UserMapper userMapper; @@ -131,7 +190,6 @@ public class UserServiceImpl extends ServiceImpl implements Us @Autowired private UserAvatarMapper userAvatarMapper; - @Autowired private RedisUtils redisUtils; @@ -151,15 +209,34 @@ public class UserServiceImpl extends ServiceImpl implements Us private UserConfigMapper userConfigMapper; @Autowired + private UserGpuConfigMapper userGpuConfigMapper; + + @Autowired ResourceQuotaClient resourceQuotaClient; + @Autowired + ResourceQuotaTemplateClient resourceQuotaTemplateClient; + + @Autowired + GpuConfigTemplateClient gpuConfigTemplateClient; + + @Autowired + private GpuConfigClient gpuConfigClient; + + @Autowired + private CleanupUserResourcesAsync cleanupUserResourcesAsync; + @Autowired + private ObtainAccessToken obtainAccessToken; /** * 测试标识 true:允许debug false:拒绝debug */ @Value("${debug.flag}") private Boolean debugFlag; + @Value("${email.send-limit}") + private Integer emailSendLimit; + private final String LOCK_SEND_CODE = "LOCK_SEND_CODE"; /** @@ -173,10 +250,12 @@ public class UserServiceImpl extends ServiceImpl implements Us public Object queryAll(UserQueryDTO criteria, Page page) { if (criteria.getRoleId() == null) { IPage users = userMapper.selectCollPage(page, WrapperHelp.getWrapper(criteria)); - return PageUtil.toPage(users, userConvert::toDto); + List userDTOList = convertToUserDTO(users); + return PageUtil.toPage(users, userDTOList); } else { IPage users = userMapper.selectCollPageByRoleId(page, WrapperHelp.getWrapper(criteria), criteria.getRoleId()); - return PageUtil.toPage(users, userConvert::toDto); + List userDTOList = convertToUserDTO(users); + return PageUtil.toPage(users, userDTOList); } } @@ -189,7 +268,17 @@ public class UserServiceImpl extends ServiceImpl implements Us @Override public List queryAll(UserQueryDTO criteria) { List users = userMapper.selectCollList(WrapperHelp.getWrapper(criteria)); - return userConvert.toDto(users); + List userDTOList = null; + if (CollectionUtil.isEmpty(users)) { + return userDTOList; + } + userDTOList = userConvert.toDto(users); + for (UserDTO userDTO : userDTOList) { + String userGroupName = userMapper.queryUserGroupNameByUserId(userDTO.getId()); + userDTO.setUserGroupName(userGroupName); + } + + return userDTOList; } /** @@ -248,15 +337,14 @@ public class UserServiceImpl extends ServiceImpl implements Us for (Role role : resources.getRoles()) { roleMapper.tiedUserRole(user.getId(), role.getId()); } - UserConfigDTO userConfigDTO = new UserConfigDTO(); - userConfigDTO.setUserId(user.getId()); - userConfigDTO.setCpuLimit(cpuLimit); - userConfigDTO.setMemoryLimit(memoryLimit); - userConfigDTO.setGpuLimit(gpuLimit); - DataResponseBody dataResponseBody = resourceQuotaClient.updateResourceQuota(userConfigDTO); - if (!dataResponseBody.succeed()){ - throw new BusinessException("用户配置更新失败"); - } + //初始化用户配置 + UserConfigSaveDTO userConfigDTO = new UserConfigSaveDTO(); + userConfigDTO.setUserId(user.getId()).setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) + .setNotebookDelayDeleteTime(userConfigNotebookDelayDeleteTime); + List userGpuConfigs = new ArrayList<>(); + userGpuConfigs.add(new UserGpuConfigDTO().setGpuType(gpuType).setGpuModel(gpuModel).setK8sLabelKey(k8sLabelKey).setGpuLimit(gpuNumLimit)); + userConfigDTO.setGpuResources(userGpuConfigs); + saveUserConfig(userConfigDTO, null); return userConvert.toDto(user); } @@ -307,19 +395,15 @@ public class UserServiceImpl extends ServiceImpl implements Us */ @Override @Transactional(rollbackFor = Exception.class) - public void delete(Set ids) { + public void delete(Set ids, String accessToken) { if (!CollectionUtils.isEmpty(ids)) { Long adminId = Long.valueOf(UserConstant.ADMIN_USER_ID); if (ids.contains(adminId)) { throw new BusinessException(BaseErrorCodeEnum.SYSTEM_USER_CANNOT_DELETE); } - ids.forEach(id -> { - userMapper.updateById( - User.builder() - .id(id) - .deleted(SwitchEnum.getBooleanValue(SwitchEnum.ON.getValue())) - .build()); - }); + userMapper.deleteBatchIds(ids); + //异步清理用户资源 + cleanupUserResourcesAsync.cleanUserResource(ids, accessToken); } } @@ -339,6 +423,9 @@ public class UserServiceImpl extends ServiceImpl implements Us } UserDTO dto = new UserDTO(); BeanUtils.copyProperties(user, dto); + if (user.getUserAvatar() != null && StrUtil.isNotBlank(user.getUserAvatar().getPath())) { + dto.setUserAvatarPath(user.getUserAvatar().getPath()); + } List roles = roleMapper.findRolesByUserId(user.getId()); if (!CollectionUtils.isEmpty(roles)) { dto.setRoles(roles.stream().map(a -> { @@ -357,12 +444,36 @@ public class UserServiceImpl extends ServiceImpl implements Us private SysUserConfigDTO getUserConfig(Long userId) { UserConfig userConfig = userConfigMapper.selectOne(new QueryWrapper<>(new UserConfig().setUserId(userId))); - SysUserConfigDTO sysUserConfigDTO= new SysUserConfigDTO(); - if (userConfig == null){ - return sysUserConfigDTO.setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) - .setGpuLimit(gpuLimit).setNotebookDelayDeleteTime(defaultNotebookDelayDeleteTime); + SysUserConfigDTO sysUserConfigDTO = new SysUserConfigDTO(); + // 如果用户配置为空,则返回默认配置 + if (userConfig == null) { + sysUserConfigDTO.setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) + .setNotebookDelayDeleteTime(userConfigNotebookDelayDeleteTime); + } else { + BeanUtils.copyProperties(userConfig, sysUserConfigDTO); + } + // 查询用户GPU配置 + List userGpuConfigs = userGpuConfigMapper.selectList(new QueryWrapper<>(new UserGpuConfig().setUserId(userId))); + // 如果老用户未初始化GPU配置,则返回默认配置 + if (CollectionUtils.isEmpty(userGpuConfigs) && userGpuConfigMapper.selectCountByUserId(userId) == 0) { + List preUserGpuConfigs = userGpuConfigMapper.selectList(new QueryWrapper<>(new UserGpuConfig().setUserId(0L))); + if (CollectionUtil.isNotEmpty(preUserGpuConfigs)) { + userGpuConfigs.addAll(preUserGpuConfigs); + } + } + List sysUserGpuConfigDTOs = userGpuConfigs.stream().map(x -> { + SysUserGpuConfigDTO sysUserGpuConfigDTO = new SysUserGpuConfigDTO(); + BeanUtils.copyProperties(x, sysUserGpuConfigDTO); + return sysUserGpuConfigDTO; + }).collect(Collectors.toList()); + sysUserConfigDTO.setGpuResources(sysUserGpuConfigDTOs); + //如果当前用户如果没有默认镜像,就使用管理员的 + if (userConfig == null || userConfig.getDefaultImageId() == null) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); + queryWrapper.eq(UserConfig::getUserId, PermissionAspect.PUBLIC_DATA_USER_ID); + UserConfig adminConfig = userConfigMapper.selectOne(queryWrapper); + sysUserConfigDTO.setDefaultImageId(adminConfig.getDefaultImageId()); } - BeanUtils.copyProperties(userConfig, sysUserConfigDTO); return sysUserConfigDTO; } @@ -491,6 +602,7 @@ public class UserServiceImpl extends ServiceImpl implements Us //用户信息校验 checkoutUserInfo(userRegisterDTO); String encode = passwordEncoder.encode(RsaEncrypt.decrypt(userRegisterDTO.getPassword(), privateKey)); + Long userId; try { User newUser = User.builder() .email(userRegisterDTO.getEmail()) @@ -503,18 +615,42 @@ public class UserServiceImpl extends ServiceImpl implements Us //新增用户注册信息 userMapper.insert(newUser); - + userId = newUser.getId(); //绑定用户默认权限 - userRoleMapper.insert(UserRole.builder().roleId((long) UserConstant.REGISTER_ROLE_ID).userId(newUser.getId()).build()); - + userRoleMapper.insert(UserRole.builder().roleId((long) UserConstant.REGISTER_ROLE_ID).userId(userId).build()); } catch (Exception e) { LogUtil.error(LogEnum.SYS_ERR, "UserServiceImpl userRegister error , param:{} error:{}", JSONObject.toJSONString(userRegisterDTO), e); throw new BusinessException(BaseErrorCodeEnum.ERROR_SYSTEM.getCode(), BaseErrorCodeEnum.ERROR_SYSTEM.getMsg()); } + //初始化用户配置 + execute(userId, userRegisterDTO.getUsername(), userRegisterDTO.getPassword()); return new DataResponseBody(); } + /** + * 同步初始化用户配置 + * @param userId 用户id + * @param username 用户名 + * @param password 用户密码 + */ + public void execute(Long userId, String username, String password) { + TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronizationAdapter() { + @Override + public void afterCommit() { + //为注册用户生成token + String token = obtainAccessToken.generateToken(username, password); + //初始化用户配置 + UserConfigSaveDTO userConfigDTO = new UserConfigSaveDTO(); + userConfigDTO.setUserId(userId).setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) + .setNotebookDelayDeleteTime(userConfigNotebookDelayDeleteTime); + List userGpuConfigs = new ArrayList<>(); + userGpuConfigs.add(new UserGpuConfigDTO().setGpuType(gpuType).setGpuModel(gpuModel).setK8sLabelKey(k8sLabelKey).setGpuLimit(gpuNumLimit)); + userConfigDTO.setGpuResources(userGpuConfigs); + saveUserConfig(userConfigDTO, token); + } + }); + } /** * 获取code通过发送邮件 @@ -565,6 +701,7 @@ public class UserServiceImpl extends ServiceImpl implements Us } catch (Exception e) { redisUtils.hdel(UserConstant.USER_EMAIL_REGISTER.concat(email), email); + redisUtils.hdel(UserConstant.USER_EMAIL_LIMIT_COUNT.concat(email), email); LogUtil.error(LogEnum.SYS_ERR, "UserServiceImpl getCodeBySentEmail error , param:{} error:{}", email, e); throw new BusinessException(BaseErrorCodeEnum.ERROR_SYSTEM.getCode(), BaseErrorCodeEnum.ERROR_SYSTEM.getMsg()); } @@ -623,7 +760,7 @@ public class UserServiceImpl extends ServiceImpl implements Us .email(curUser.getUser().getEmail()) .password(Md5Util.createMd5(Md5Util.createMd5(curUser.getUsername()).concat(initialPassword))) .username(curUser.getUsername()) - .is_staff(!CollectionUtils.isEmpty(userRoles) ? true : false).build(); + .is_staff(!CollectionUtils.isEmpty(userRoles)).build(); return BeanMap.create(vo); } @@ -752,43 +889,112 @@ public class UserServiceImpl extends ServiceImpl implements Us // 查询用户配置 UserConfig userConfig = userConfigMapper.selectOne(new QueryWrapper<>(new UserConfig().setUserId(userId))); UserConfigVO userConfigVO = new UserConfigVO(); - // 如果用户配置为空,则返回 - if (userConfig == null){ - return userConfigVO.setUserId(userId).setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) - .setGpuLimit(gpuLimit).setNotebookDelayDeleteTime(defaultNotebookDelayDeleteTime); + // 如果用户配置为空,则返回默认配置 + if (userConfig == null) { + userConfigVO.setUserId(userId).setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) + .setNotebookDelayDeleteTime(userConfigNotebookDelayDeleteTime); + } else { + BeanUtils.copyProperties(userConfig, userConfigVO); } - // 封装用户配置 VO - BeanUtils.copyProperties(userConfig, userConfigVO); + // 查询用户GPU配置 + List userGpuConfigs = userGpuConfigMapper.selectList(new QueryWrapper<>(new UserGpuConfig().setUserId(userId))); + List userGpuConfigVOList = new ArrayList<>(); + // 如果老用户未初始化GPU配置,则返回默认配置 + if (CollectionUtils.isEmpty(userGpuConfigs) && userGpuConfigMapper.selectCountByUserId(userId) == 0) { + List preUserGpuConfigs = userGpuConfigMapper.selectList(new QueryWrapper<>(new UserGpuConfig().setUserId(PermissionAspect.PUBLIC_DATA_USER_ID))); + userGpuConfigs = preUserGpuConfigs; + } + userGpuConfigs.forEach(userGpuConfig -> { + UserGpuConfigVO userGpuConfigVO = new UserGpuConfigVO(); + BeanUtils.copyProperties(userGpuConfig, userGpuConfigVO); + userGpuConfigVOList.add(userGpuConfigVO); + }); + + userConfigVO.setGpuResources(userGpuConfigVOList); return userConfigVO; } /** * 创建或更新用户配置 * - * @param userConfigDTO 用户配置 - * @return org.dubhe.admin.domain.vo.UserConfigCreateVO 用户配置 VO + * @param userConfigSaveDTO 用户配置 */ @Override @Transactional(rollbackFor = Exception.class) - public UserConfigCreateVO createOrUpdateUserConfig(UserConfigDTO userConfigDTO) { - DataResponseBody dataResponseBody = resourceQuotaClient.updateResourceQuota(userConfigDTO); - if (!dataResponseBody.succeed()){ - throw new BusinessException("用户配置更新失败"); + public void saveUserConfig(UserConfigSaveDTO userConfigSaveDTO, String token) { + //设置k8s quota (k8s quota设置只支持以厂商为单位的配置) + ResourceQuotaDTO resourceQuotaDTO = new ResourceQuotaDTO(); + BeanUtils.copyProperties(userConfigSaveDTO, resourceQuotaDTO); + //目前只有nvidia和suiyuan,map初始化空间设置为2 + Map map = new HashMap<>(2); + if (!CollectionUtils.isEmpty(userConfigSaveDTO.getGpuResources())) { + for (UserGpuConfigDTO userGpuConfigDTO : userConfigSaveDTO.getGpuResources()) { + Integer gpuNumLimit = userGpuConfigDTO.getGpuLimit(); + if (map.containsKey(userGpuConfigDTO.getK8sLabelKey())) { + gpuNumLimit = map.get(userGpuConfigDTO.getK8sLabelKey()) + userGpuConfigDTO.getGpuLimit(); + } + map.put(userGpuConfigDTO.getK8sLabelKey(), gpuNumLimit); + } + } + resourceQuotaDTO.setGpuLimit(map); + + //设置k8s GPU型号配置 + GpuConfigDTO gpuConfigDTO = new GpuConfigDTO(); + gpuConfigDTO.setUserId(userConfigSaveDTO.getUserId()); + if (!CollectionUtils.isEmpty(userConfigSaveDTO.getGpuResources())) { + List sysUserGpuConfigs = userConfigSaveDTO.getGpuResources().stream().map(x -> { + SysUserGpuConfigDTO sysUserGpuConfigDTO = new SysUserGpuConfigDTO(); + BeanUtils.copyProperties(x, sysUserGpuConfigDTO); + return sysUserGpuConfigDTO; + }).collect(Collectors.toList()); + gpuConfigDTO.setGpuResources(sysUserGpuConfigs); } + DataResponseBody gpuConfigDataResponse; + if (token == null) { + gpuConfigDataResponse = gpuConfigClient.updateGpuConfig(gpuConfigDTO); + } else { + gpuConfigDataResponse = gpuConfigTemplateClient.updateGpuConfig(gpuConfigDTO, AuthConst.ACCESS_TOKEN_PREFIX + token); + } + if (gpuConfigDataResponse == null || !gpuConfigDataResponse.succeed()) { + throw new BusinessException("k8s GPU型号配置更新失败"); + } + //创建或更新用户配置 UserConfig userConfig = new UserConfig(); - BeanUtils.copyProperties(userConfigDTO, userConfig); + BeanUtils.copyProperties(userConfigSaveDTO, userConfig); userConfigMapper.insertOrUpdate(userConfig); - // 封装用户配置 VO - UserConfigCreateVO userConfigCreateVO = new UserConfigCreateVO().setId(userConfig.getId()); - return userConfigCreateVO; - } + //创建或更新用户GPU配置 + //删除原有记录 + if (userGpuConfigMapper.selectCount(new QueryWrapper<>(new UserGpuConfig().setUserId(userConfigSaveDTO.getUserId()))) > 0) { + userGpuConfigMapper.delete(new QueryWrapper<>(new UserGpuConfig().setUserId(userConfigSaveDTO.getUserId()))); + } + if (!CollectionUtils.isEmpty(userConfigSaveDTO.getGpuResources())) { + List userGpuConfigs = userConfigSaveDTO.getGpuResources().stream().map(x -> + { + UserGpuConfig userGpuConfig = new UserGpuConfig(); + BeanUtils.copyProperties(x, userGpuConfig); + userGpuConfig.setUserId(userConfigSaveDTO.getUserId()); + return userGpuConfig; + }).collect(Collectors.toList()); + userGpuConfigMapper.insertBatchs(userGpuConfigs); + } + //更新quota中GPU的配额 + DataResponseBody dataResponseBody; + if (token == null) { + dataResponseBody = resourceQuotaClient.updateResourceQuota(resourceQuotaDTO); + } else { + dataResponseBody = resourceQuotaTemplateClient.updateResourceQuota(resourceQuotaDTO, AuthConst.ACCESS_TOKEN_PREFIX + token); + } + if (dataResponseBody == null || !dataResponseBody.succeed()) { + throw new BusinessException("k8s quota用户配置更新失败"); + } + } /** * 校验验证码 * - * @param loginCaptcha 验证码参数 - * @param uuid 验证码redis-key + * @param loginCaptcha 验证码参数 + * @param uuid 验证码redis-key */ private void validateCode(String loginCaptcha, String uuid) { // 验证码未输入 @@ -853,17 +1059,15 @@ public class UserServiceImpl extends ServiceImpl implements Us */ private void limitSendEmail(final String receiverMailAddress) { double count = redisUtils.hincr(UserConstant.USER_EMAIL_LIMIT_COUNT.concat(receiverMailAddress), receiverMailAddress, 1); - if (count > UserConstant.COUNT_SENT_EMAIL) { - LogUtil.error(LogEnum.SYS_ERR, "Email verification code cannot exceed three times , error:{}", UserConstant.COUNT_SENT_EMAIL); + if (count > emailSendLimit) { + LogUtil.error(LogEnum.SYS_ERR, "Email verification code cannot exceed three times , error:{}", emailSendLimit); throw new BusinessException(BaseErrorCodeEnum.SYSTEM_USER_EMAIL_CODE_CANNOT_EXCEED_TIMES.getCode(), BaseErrorCodeEnum.SYSTEM_USER_EMAIL_CODE_CANNOT_EXCEED_TIMES.getMsg()); } else { // 验证码次数凌晨清除 String concat = UserConstant.USER_EMAIL_LIMIT_COUNT.concat(receiverMailAddress); - - Long secondsNextEarlyMorning = DateUtil.getSecondTime(); - - redisUtils.expire(concat, secondsNextEarlyMorning); + Duration duration = Duration.between(LocalDateTime.now(), LocalDate.now().plusDays(1).atTime(0, 0, 0)); + redisUtils.expire(concat, duration.getSeconds()); } } @@ -889,8 +1093,8 @@ public class UserServiceImpl extends ServiceImpl implements Us /** * 校验 邮箱地址 和 验证码 * - * @param code 验证码 - * @param email 邮箱 + * @param code 验证码 + * @param email 邮箱 * @param codeRedisKey redis-key */ private void checkoutEmailAndCode(String code, String email, String codeRedisKey) { @@ -993,4 +1197,58 @@ public class UserServiceImpl extends ServiceImpl implements Us dto.setUserConfig(sysUserConfigDTO); return DataResponseFactory.success(dto); } + + /** + * 重置密码 + * + * @return 重置密码结果集 + */ + @Transactional(rollbackFor = Exception.class) + @Override + public DataResponseBody resetPassword(Long userId) { + + PasswordEncoder passwordEncoder = PasswordEncoderFactory.getPasswordEncoder(); + //重置为默认密码123456,加密密码 + String encode = passwordEncoder.encode(initialPassword); + userMapper.updateById(User.builder().id(userId).password(encode).lastPasswordResetTime(new Date()).build()); + return new DataResponseBody(); + } + + /** + * 获取用户分配的资源总量 + * + * @return 资源配额总量统计 + */ + @Override + public DataResponseBody getAllotResources() { + //获取内存、cpu分配总量 + UserAllotResourceVO userAllotResourceVO = userConfigMapper.selectResourceSum(); + //按型号获取gpu分配总量 + List gpuAllotVOList = userGpuConfigMapper.selectGpuAllotSum(); + List gpuAllotTotal = gpuAllotVOList.stream().map(allot -> Integer.valueOf(allot.getAllotTotal())).collect(Collectors.toList()); + userAllotResourceVO.setGpuAllotTotal(gpuAllotTotal.stream().reduce(Integer::sum).get()); + userAllotResourceVO.setGpuAllotList(gpuAllotVOList); + return new DataResponseBody(userAllotResourceVO); + } + + + /** + * 将user转换为userDTO,并且设置对应的用户组名 + * + * @return userDTO list + */ + private List convertToUserDTO(IPage users) { + List userDTOList = new ArrayList<>(); + if (CollectionUtil.isEmpty(users.getRecords())) { + return userDTOList; + } + userDTOList = userConvert.toDto(users.getRecords()); + for (UserDTO userDTO : userDTOList) { + String userGroupName = userMapper.queryUserGroupNameByUserId(userDTO.getId()); + userDTO.setUserGroupName(userGroupName); + } + + return userDTOList; + } + } diff --git a/dubhe-server/admin/src/main/resources/bootstrap.yml b/dubhe-server/admin/src/main/resources/bootstrap.yml index d5c9584..6f17ae9 100644 --- a/dubhe-server/admin/src/main/resources/bootstrap.yml +++ b/dubhe-server/admin/src/main/resources/bootstrap.yml @@ -10,8 +10,8 @@ spring: nacos: config: enabled: true - server-addr: 127.0.0.1:8848 - namespace: dubhe-server-cloud-prod + server-addr: 10.105.1.133:8848 + namespace: dubhe-server-cloud-dev shared-configs[0]: data-id: common-biz.yaml group: dubhe @@ -33,5 +33,5 @@ spring: enabled: true namespace: dubhe-server-cloud-dev group: dubhe - server-addr: 127.0.0.1:8848 + server-addr: 10.105.1.133:8848 diff --git a/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml b/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml index e0c0119..5a38d56 100644 --- a/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml +++ b/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml @@ -21,9 +21,6 @@ memory_limit, - - gpu_limit, - create_user_id, @@ -33,6 +30,9 @@ update_user_id, + + default_image_id, + deleted, @@ -50,9 +50,6 @@ #{memoryLimit}, - - #{gpuLimit}, - #{createUserId}, @@ -62,6 +59,9 @@ #{updateUserId}, + + #{defaultImageId}, + #{deleted}, @@ -77,9 +77,6 @@ memory_limit = #{memoryLimit}, - - gpu_limit = #{gpuLimit}, - create_time = #{createTime}, @@ -92,9 +89,26 @@ update_user_id = #{updateUserId}, + + default_image_id = #{defaultImageId}, + deleted = #{deleted}, - \ No newline at end of file + + + diff --git a/dubhe-server/admin/src/main/resources/mapper/UserGpuConfigMapper.xml b/dubhe-server/admin/src/main/resources/mapper/UserGpuConfigMapper.xml new file mode 100644 index 0000000..b65b8c4 --- /dev/null +++ b/dubhe-server/admin/src/main/resources/mapper/UserGpuConfigMapper.xml @@ -0,0 +1,12 @@ + + + + + + + insert into user_gpu_config (user_id,gpu_type,gpu_model,k8s_label_key,gpu_limit) values + + (#{item.userId}, #{item.gpuType}, #{item.gpuModel}, #{item.k8sLabelKey}, #{item.gpuLimit}) + + + \ No newline at end of file diff --git a/dubhe-server/admin/src/main/resources/mapper/UserMapper.xml b/dubhe-server/admin/src/main/resources/mapper/UserMapper.xml index 7bd66ca..288681d 100644 --- a/dubhe-server/admin/src/main/resources/mapper/UserMapper.xml +++ b/dubhe-server/admin/src/main/resources/mapper/UserMapper.xml @@ -13,4 +13,9 @@ left join users_roles ur on ra.role_id = ur.role_id where ur.user_id = #{userId} and p.deleted=0; + + \ No newline at end of file diff --git a/dubhe-server/admin/src/test/java/org/dubhe/admin/AdminApplicationTests.java b/dubhe-server/admin/src/test/java/org/dubhe/admin/AdminApplicationTests.java index b145a05..5328e55 100644 --- a/dubhe-server/admin/src/test/java/org/dubhe/admin/AdminApplicationTests.java +++ b/dubhe-server/admin/src/test/java/org/dubhe/admin/AdminApplicationTests.java @@ -1,5 +1,6 @@ package org.dubhe.admin; +import com.alibaba.fastjson.JSON; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import org.dubhe.admin.domain.dto.DictCreateDTO; import org.dubhe.admin.domain.dto.DictDetailDTO; @@ -7,8 +8,10 @@ import org.dubhe.admin.domain.dto.DictDetailQueryDTO; import org.dubhe.admin.domain.entity.DictDetail; import org.dubhe.admin.rest.DictController; import org.dubhe.admin.service.DictDetailService; +import org.dubhe.admin.service.UserService; import org.dubhe.biz.base.utils.DateUtil; import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserConfigVO; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; @@ -28,6 +31,9 @@ public class AdminApplicationTests { @Autowired private DictDetailService dictDetailService; + @Autowired + private UserService userService; + /** * 字典分页查询 */ @@ -67,4 +73,10 @@ public class AdminApplicationTests { } + @Test + public void findUserConfig() { + UserConfigVO userConfig = userService.findUserConfig(1l); + System.out.println(JSON.toJSONString(userConfig)); + } + } diff --git a/dubhe-server/auth/src/main/resources/bootstrap.yml b/dubhe-server/auth/src/main/resources/bootstrap.yml index de12431..5361c38 100644 --- a/dubhe-server/auth/src/main/resources/bootstrap.yml +++ b/dubhe-server/auth/src/main/resources/bootstrap.yml @@ -11,8 +11,8 @@ spring: nacos: config: enabled: true - server-addr: 127.0.0.1:8848 - namespace: dubhe-server-cloud-prod + server-addr: 10.105.1.133:8848 + namespace: dubhe-server-cloud-dev shared-configs[0]: data-id: common-biz.yaml group: dubhe @@ -25,5 +25,5 @@ spring: enabled: true namespace: dubhe-server-cloud-dev group: dubhe - server-addr: 127.0.0.1:8848 + server-addr: 10.105.1.133:8848 diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java index a04c803..eb0543e 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java @@ -82,6 +82,10 @@ public class ApplicationNameConst { public final static String SERVER_DATA_DCM = "dubhe-data-dcm"; /** + * TADL + */ + public final static String SERVER_TADL = "dubhe-tadl"; + /** * k8s */ public final static String SERVER_K8S = "dubhe-k8s"; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java index 3a08ce2..5422976 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java @@ -54,10 +54,10 @@ public class AuthConst { * 默认匿名访问路径 */ public final static String[] DEFAULT_PERMIT_PATHS = {"/swagger**/**", "/webjars/**", "/v2/api-docs/**", "/doc.html/**", - "/users/findUserByUsername", "/auth/login", "/auth/code", - "/datasets/files/annotations/auto","/datasets/versions/**/convert/finish", "/datasets/enhance/finish", - "/auth/getCodeBySentEmail","/auth/userRegister","/ws/**", - StringConstant.RECYCLE_CALL_URI+"**" + "/users/findUserByUsername", "/auth/login", "/auth/code", "/auth/resetPassword", + "/datasets/files/annotations/auto", "/datasets/versions/**/convert/finish", "/datasets/enhance/finish", + "/auth/getCodeBySentEmail", "/auth/userRegister", "/ws/**", + StringConstant.RECYCLE_CALL_URI + "**" }; /** diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java index 8f406e8..0fa7ea0 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java @@ -68,6 +68,7 @@ public final class MagicNumConstant { public static final int TWO_THOUSAND_TWENTY_EIGHT = 2048; public static final int THREE_THOUSAND = 3000; public static final int FOUR_THOUSAND = 4000; + public static final int EIGHT_THOUSAND_ONE_HUNDRED_NINETY_TWO = 8192; public static final int NINE_THOUSAND = 9000; public static final int NINE_THOUSAND_NINE_HUNDRED_NINTY_NINE = 9999; public static final int TEN_THOUSAND = 10000; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java index 53306ea..99d06d2 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java @@ -32,6 +32,8 @@ public class NumberConstant { public final static int NUMBER_6 = 6; public final static int NUMBER_8 = 8; public final static int NUMBER_10 = 10; + public final static int NUMBER_12 = 12; + public final static int NUMBER_24 = 24; public final static int NUMBER_30 = 30; public final static int NUMBER_32 = 32; public final static int NUMBER_50 = 50; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java index d572156..407cb5e 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java @@ -132,6 +132,9 @@ public final class Permissions { public static final String USER_GROUP_EDIT_USER_ROLE = "hasAuthority('ROLE_system:userGroup:editUserRole')"; public static final String USER_GROUP_EDIT_USER_STATE = "hasAuthority('ROLE_system:userGroup:editUserState')"; public static final String USER_GROUP_DELETE_USER = "hasAuthority('ROLE_system:userGroup:deleteUser')"; + public static final String USER_GROUP_RESET_USER_PASSWORD ="hasAuthority('ROLE_system:userGroup:resetUserPassword')" ; + public static final String USER_GROUP_CONFIG_EDIT ="hasAuthority('ROLE_system:userGroup:editUserConfig')" ; + /** * 控制台:用户管理 @@ -142,6 +145,7 @@ public final class Permissions { public static final String USER_DOWNLOAD = "hasAuthority('ROLE_system:user:download')"; public static final String USER_CONFIG_EDIT = "hasAuthority('ROLE_system:user:configEdit')"; public static final String USER_RESOURCE_INFO = "hasAuthority('ROLE_system:user:resourceInfo')"; + public static final String USER_RESET_PASSWORD = "hasAuthority('ROLE_system:user:resetPassword')"; /** * 控制台:角色管理 @@ -203,6 +207,13 @@ public final class Permissions { public static final String SPECS_DELETE = "hasAuthority('ROLE_system:specs:delete')"; /** + * 控制台:GPU资源管理 + */ + public static final String GPU_CREATE = "hasAuthority('ROLE_system:gpu:create')"; + public static final String GPU_EDIT = "hasAuthority('ROLE_system:gpu:edit')"; + public static final String GPU_DELETE = "hasAuthority('ROLE_system:gpu:delete')"; + + /** * 专业版:终端 */ public static final String TERMINAL_CREATE = "hasAuthority('ROLE_terminal:specs:create')"; @@ -211,6 +222,8 @@ public final class Permissions { public static final String TERMINAL_DELETE = "hasAuthority('ROLE_terminal:specs:delete')"; public static final String TERMINAL_DETAIL = "hasAuthority('ROLE_terminal:specs:detail')"; public static final String TERMINAL_LIST = "hasAuthority('ROLE_terminal:specs:list')"; + public static final String TERMINAL_UPDATE = "hasAuthority('ROLE_terminal:specs:update')"; + private Permissions() { } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java index 5d35700..584e3b0 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java @@ -30,7 +30,7 @@ public final class StringConstant { public static final String REQUEST_METHOD_GET = "GET"; /** - * 字母、数字、英文横杠和下划线匹配 + * 字母、数字、汉字、英文横杠和下划线匹配 */ public static final String REGEXP_NAME = "^[a-zA-Z0-9\\-\\_\\u4e00-\\u9fa5]+$"; @@ -50,9 +50,42 @@ public final class StringConstant { public static final String REGEXP_SPECS = "^[a-zA-Z0-9\\-\\_\\s\\u4e00-\\u9fa5]+$"; /** + * GPU类型支持字母、数字、汉字、英文横杠、英文.号、空白字符和英文斜杠 + */ + public static final String REGEXP_GPU_TYPE = "^[a-zA-Z0-9\\-\\.\\s\\/\\u4e00-\\u9fa5]+$"; + + /** + * GPU型号支持小写字母、数字、英文横杠、英文.号和英文斜杠 + */ + public static final String REGEXP_GPU_MODEL = "^[a-z0-9\\-\\.\\/]+$"; + + /** + * k8s GPU资源名称支持小写字母、数字、英文横杠、英文.号和英文斜杠 + */ + public static final String REGEXP_K8S = "^[a-z0-9\\-\\.\\/]+$"; + + /** * 整数匹配 */ public static final Pattern PATTERN_NUM = Pattern.compile("^[-\\+]?[\\d]*$"); + /** + * 数字匹配 + */ + public static final String NUMBER ="(\\d+)"; + /** + * 整数匹配 + */ + public static final Pattern PATTERN_NUMBER = Pattern.compile("(\\d+)"); + /** + * 小数匹配 + */ + public static final Pattern PATTERN_DECIMAL = Pattern.compile("(\\d+\\.\\d+)"); + + + /** + * 描述内容支持字母、数字、汉字、英文横杠和下划线 + */ + public static final String REGEXP_DESCRIPTION = "^[a-zA-Z0-9\\-\\_\\u4e00-\\u9fa5]+$"; /** @@ -80,7 +113,7 @@ public final class StringConstant { public static final String K8S_CALLBACK_PATH_DEPLOYMENT = "/api/k8s/callback/deployment"; public static final String MULTIPART = "multipart/form-data"; - public static final String PIP_SITE_PACKAGE ="pip-site-package"; + public static final String PIP_SITE_PACKAGE = "pip-site-package"; /** * 分页内容 @@ -106,12 +139,20 @@ public final class StringConstant { public static final String START_LOW = "start"; public static final String END_LOW = "end"; public static final String STEP_LOW = "step"; + //1024*1024*1024 + public static final String MEM_UNIT = "1073741824"; /** * 任务缓存 */ - public static final String CACHE_TASK_ID ="task_id"; - public static final String CACHE_TASK_NAME ="task_name"; + public static final String CACHE_TASK_ID = "task_id"; + public static final String CACHE_TASK_NAME = "task_name"; + + + /** + * python命令行参数格式 + */ + public static final String PYTHON_COMMAND_PATTERN = " --%s=%s"; private StringConstant() { diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java index db633fd..f973828 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java @@ -27,8 +27,10 @@ public class SymbolConstant { public static final String COLON = ":"; public static final String LINEBREAK = "\n"; public static final String BLANK = ""; + public static final String SPACE = " "; public static final String QUESTION = "?"; public static final String ZERO = "0"; + public static final String ONE = "1"; public static final String DOT = "."; public static final String TOKEN = "token"; public static final String GET = "get"; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java index f4eb296..76a9cf8 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java @@ -101,4 +101,19 @@ public class UserConstant { */ public final static Long DEFAULT_CREATE_USER_ID = 0L; + /** + * namespace前缀 + */ + public final static String NAMESPACE_PREFIX = "namespace-"; + + /** + * 查询7d内的用户资源用量 + */ + public final static String UNIT_7D = "7d"; + + /** + * 查询15d内的用户资源用量 + */ + public final static String UNIT_15D = "15d"; + } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/DeleteDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/DeleteDTO.java new file mode 100644 index 0000000..4d2f92e --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/DeleteDTO.java @@ -0,0 +1,38 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.base.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.io.Serializable; +import java.util.Set; + +/** + * @description 删除用数据包 + * @date 2020-03-15 + */ +@Getter +@Setter +public class DeleteDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + private Set ids; + +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/GpuConfigDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/GpuConfigDTO.java new file mode 100644 index 0000000..47468fb --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/GpuConfigDTO.java @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; + +import javax.validation.constraints.NotNull; +import java.io.Serializable; +import java.util.List; + +/** + * @description 用户配置DTO + * @date 2021-09-06 + */ +@Data +@Accessors(chain = true) +public class GpuConfigDTO implements Serializable { + private static final long serialVersionUID = 1L; + + /** + * 用户 ID + */ + @NotNull(message = "用户 ID 不能为空") + private Long userId; + + /** + * GPU 资源限制 + */ + private List gpuResources; +} \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ModelOptAlgorithmCreateDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ModelOptAlgorithmCreateDTO.java index df7014c..d3ed545 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ModelOptAlgorithmCreateDTO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ModelOptAlgorithmCreateDTO.java @@ -18,6 +18,7 @@ package org.dubhe.biz.base.dto; import lombok.Data; import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.constant.NumberConstant; import org.dubhe.biz.base.constant.StringConstant; import org.hibernate.validator.constraints.Length; @@ -42,7 +43,7 @@ public class ModelOptAlgorithmCreateDTO implements Serializable { private String name; @NotBlank(message = "代码目录不能为空") - @Length(max = NumberConstant.NUMBER_64, message = "代码目录-输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "代码目录-输入长度不能超过128个字符") private String path; } \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/NamespaceDeleteDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/NamespaceDeleteDTO.java new file mode 100644 index 0000000..de03492 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/NamespaceDeleteDTO.java @@ -0,0 +1,38 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; + +import javax.validation.constraints.NotEmpty; +import java.io.Serializable; +import java.util.Set; + +/** + * @description 命名空间删除DTO + * @date 2021-11-29 + */ +@Data +@Accessors(chain = true) +public class NamespaceDeleteDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @NotEmpty(message = "id不能为空") + Set ids; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/PtImageQueryUrlDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/PtImageQueryUrlDTO.java index 9c7f7bc..0f307cb 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/PtImageQueryUrlDTO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/PtImageQueryUrlDTO.java @@ -37,4 +37,6 @@ public class PtImageQueryUrlDTO { private Integer projectType; + private Long id; + } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryResourceSpecsDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryResourceSpecsDTO.java index 08e48de..087fec1 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryResourceSpecsDTO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryResourceSpecsDTO.java @@ -45,10 +45,10 @@ public class QueryResourceSpecsDTO implements Serializable { private String specsName; /** - * 所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving) + * 所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving,4:dubhe-tadl,5:dubhe-optimize) */ @NotNull(message = "所属业务场景不能为空") @Min(value = MagicNumConstant.ZERO, message = "所属业务场景错误") - @Max(value = MagicNumConstant.THREE, message = "所属业务场景错误") + @Max(value = MagicNumConstant.FIVE, message = "所属业务场景错误") private Integer module; } \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryUserK8sResourceDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryUserK8sResourceDTO.java new file mode 100644 index 0000000..6c70457 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/QueryUserK8sResourceDTO.java @@ -0,0 +1,121 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotEmpty; +import javax.validation.constraints.NotNull; +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.List; + +/** + * @description 用户k8s可用资源查询实体类 + * @date 2021-09-10 + */ +@Data +@Accessors(chain = true) +public class QueryUserK8sResourceDTO implements Serializable { + + private static final long serialVersionUID = 1L; + + @NotNull(message = "用户 ID 不能为空") + private Long userId; + + @NotNull(message = "节点个数") + private Integer resourcesPoolNode; + + /** + * GPU型号(例如:v100) + */ + private String gpuModel; + + /** + *k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + + + /** + * 主键ID + */ + @NotNull(message = "主键ID") + private Long id; + + /** + *规格名称 + */ + private String specsName; + + /** + *规格类型(0为CPU, 1为GPU) + */ + @NotNull(message = "规格类型(0为CPU, 1为GPU)") + private Boolean resourcesPoolType; + + /** + *所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving,4:dubhe-tadl) + */ + private Integer module; + + /** + *CPU数量,单位:核 + */ + @NotNull(message = "CPU数量,单位:核") + private Integer cpuNum; + + /** + *GPU数量,单位:核 + */ + @NotNull(message = "GPU数量,单位:核") + private Integer gpuNum; + + /** + *内存大小,单位:M + */ + @NotNull(message = "内存大小,单位:M") + private Integer memNum; + + /** + *工作空间的存储配额,单位:M + */ + private Integer workspaceRequest; + + /** + *创建人 + */ + private Long createUserId; + + /** + *创建时间 + */ + private Timestamp createTime; + + /** + *更新人 + */ + private Long updateUserId; + + /** + *更新时间 + */ + private Timestamp updateTime; +} \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ResourceQuotaDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ResourceQuotaDTO.java new file mode 100644 index 0000000..d39d112 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/ResourceQuotaDTO.java @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.base.dto; + +import lombok.Data; + +import javax.validation.constraints.NotNull; +import java.util.Map; + +/** + * @description k8s节点资源隔离DTO + * @date 2021-07-21 + */ +@Data +public class ResourceQuotaDTO { + + @NotNull(message = "用户 ID 不能为空") + private Long userId; + + @NotNull(message = "CPU 资源限制配置不能为空") + private Integer cpuLimit; + + @NotNull(message = "内存资源限制配置不能为空") + private Integer memoryLimit; + + @NotNull(message = "GPU 资源限制配置不能为空") + private Map gpuLimit; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java index 14296d4..a0675f7 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java @@ -18,7 +18,9 @@ package org.dubhe.biz.base.dto; import lombok.Data; import lombok.experimental.Accessors; + import java.io.Serializable; +import java.util.List; /** * @description 系统用户配置 DTO @@ -26,7 +28,7 @@ import java.io.Serializable; */ @Data @Accessors(chain = true) -public class SysUserConfigDTO implements Serializable{ +public class SysUserConfigDTO implements Serializable { private static final long serialVersionUID = 1L; @@ -46,8 +48,12 @@ public class SysUserConfigDTO implements Serializable{ private Integer memoryLimit; /** - * GPU 资源限制配置 + * GPU 资源限制 */ - private Integer gpuLimit; + private List gpuResources; + /** + * 用户默认镜像 + */ + private Long defaultImageId; } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserGpuConfigDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserGpuConfigDTO.java new file mode 100644 index 0000000..8c4bbd4 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserGpuConfigDTO.java @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; + +/** + * @description 系统用户GPU配置 DTO + * @date 2021-09-03 + */ +@Data +@Accessors(chain = true) +public class SysUserGpuConfigDTO implements Serializable { + private static final long serialVersionUID = 1L; + + /** + * GPU类型(例如:NVIDIA) + */ + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + + /** + * GPU 资源限制配置不能为空 + */ + private Integer gpuLimit; +} \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserConfigSaveDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserConfigSaveDTO.java new file mode 100644 index 0000000..3833dbc --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserConfigSaveDTO.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; + +import javax.validation.constraints.NotNull; +import java.io.Serializable; +import java.util.List; + +/** + * @description 用户配置DTO + * @date 2021-7-1 + */ +@Data +@Accessors(chain = true) +public class UserConfigSaveDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @NotNull(message = "用户 ID 不能为空") + private Long userId; + + @NotNull(message = "Notebook 延迟删除时间配置不能为空") + private Integer notebookDelayDeleteTime; + + @NotNull(message = "CPU 资源限制配置不能为空") + private Integer cpuLimit; + + @NotNull(message = "内存资源限制配置不能为空") + private Integer memoryLimit; + + private Long defaultImageId; + + private List gpuResources; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java index d10de2b..f4be1d1 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java @@ -45,6 +45,8 @@ public class UserDTO implements Serializable { private String remark; + private String userGroupName; + private Date lastPasswordResetTime; private Timestamp createTime; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserGpuConfigDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserGpuConfigDTO.java new file mode 100644 index 0000000..58f203c --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserGpuConfigDTO.java @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; + +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +/** + * @description 用户GPU配置DTO + * @date 2021-7-1 + */ +@Data +@Accessors(chain = true) +public class UserGpuConfigDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @NotBlank(message = "GPU类型") + private String gpuType; + + @NotBlank(message = "GPU型号") + private String gpuModel; + + @NotBlank(message = "k8s GPU资源标签key值") + private String k8sLabelKey; + + @NotNull(message = "GPU 资源限制配置不能为空") + private Integer gpuLimit; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BaseErrorCodeEnum.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BaseErrorCodeEnum.java index f942768..c8a3aff 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BaseErrorCodeEnum.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BaseErrorCodeEnum.java @@ -43,7 +43,7 @@ public enum BaseErrorCodeEnum implements ErrorCode { SYSTEM_USER_ALREADY_REGISTER(20003, "账号已注册!"), SYSTEM_USER_REGISTER_EMAIL_INFO_EXPIRED(20004, "邮箱验证码已过期!"), SYSTEM_USER_EMAIL_ALREADY_EXISTS(20004, "该邮箱已被注册!"), - SYSTEM_USER_EMAIL_PASSWORD_ERROR(20005, "邮件密码错误!"), + SYSTEM_USER_EMAIL_PASSWORD_ERROR(20005, "密码错误!"), SYSTEM_USER_EMAIL_CODE_CANNOT_EXCEED_TIMES(20006, "邮件发送不能超过三次!"), SYSTEM_USER_EMAIL_OR_CODE_ERROR(20007, "邮箱地址或验证码错误、请重新输入!"), SYSTEM_USER_IS_LOCKED(20008, "用户已锁定!"), diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java index 00b7d78..c931273 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java @@ -61,7 +61,10 @@ public enum BizEnum { * 专业版终端 */ TERMINAL("专业版终端", "terminal", 7), - ; + /** + * TADL + */ + TADL("TADL服务", "tadl", 8); /** * 业务模块名称 diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/AesUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/AesUtil.java index da90596..a2d737a 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/AesUtil.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/AesUtil.java @@ -35,8 +35,9 @@ public class AesUtil { private static final String AES = "AES"; + public static final String RESOURCE_KYE = "123456"; - private AesUtil(){ + private AesUtil() { } @@ -50,7 +51,7 @@ public class AesUtil { * @throws NoSuchPaddingException * @throws InvalidKeyException */ - private static Cipher getCipher(int mode,String key) throws NoSuchAlgorithmException, NoSuchPaddingException, InvalidKeyException { + private static Cipher getCipher(int mode, String key) throws NoSuchAlgorithmException, NoSuchPaddingException, InvalidKeyException { MessageDigest md5Digest = MessageDigest.getInstance("MD5"); SecretKeySpec secretKeySpec = new SecretKeySpec(md5Digest.digest(key.getBytes(StandardCharsets.UTF_8)), AES); Cipher cipher = Cipher.getInstance(AES); @@ -67,7 +68,7 @@ public class AesUtil { */ public static String encrypt(String data, String key) { try { - Cipher cipher = getCipher(Cipher.ENCRYPT_MODE,key); + Cipher cipher = getCipher(Cipher.ENCRYPT_MODE, key); byte[] content = data.getBytes(StandardCharsets.UTF_8); return new String(HexUtil.encodeHex(cipher.doFinal(content), false)); } catch (Exception e) { @@ -79,11 +80,11 @@ public class AesUtil { * 解密 * @param hexData 十六进制密文 * @param key 秘钥 - * @return String 密文 + * @return String 密文 */ public static String decrypt(String hexData, String key) { try { - Cipher cipher = getCipher(Cipher.DECRYPT_MODE,key); + Cipher cipher = getCipher(Cipher.DECRYPT_MODE, key); byte[] content = HexUtil.decodeHex(hexData); return new String(cipher.doFinal(content), StandardCharsets.UTF_8); } catch (Exception e) { diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/CommandUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/CommandUtil.java new file mode 100644 index 0000000..d491bea --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/CommandUtil.java @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.base.utils; + +import com.alibaba.fastjson.JSONObject; + +import static org.dubhe.biz.base.constant.StringConstant.PYTHON_COMMAND_PATTERN; + +/** + * @description 命令行工具类 + * @date 2021-09-22 + */ +public class CommandUtil { + /** + * 构造python运行命令 + * + * @param runCommand + * @param runParams + * @return + */ + public static String buildPythonCommand(String runCommand, JSONObject runParams) { + StringBuilder sb = new StringBuilder(); + sb.append(runCommand); + if (null != runParams && !runParams.isEmpty()) { + runParams.forEach((k, v) -> sb.append(String.format(PYTHON_COMMAND_PATTERN, k, v))); + } + return sb.toString(); + } +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/DateUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/DateUtil.java index 63d2e14..0ec3348 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/DateUtil.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/DateUtil.java @@ -19,6 +19,7 @@ package org.dubhe.biz.base.utils; import java.sql.Timestamp; import java.text.DateFormat; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; import java.time.LocalDate; @@ -125,4 +126,24 @@ public class DateUtil { return Timestamp.valueOf(sdf.format(calendar.getTime())); } + + /** + * CST时间转换成UTC Date String + * 输入:2021-11-17T08:50:23Z + * 返回:2021-11-17 16:50:23 + * + * @param time + * @return + * @throws ParseException + */ + public static String convertCST2UTCDate(String time) throws ParseException { + SimpleDateFormat parseFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + Date dateTime = parseFormat.parse(time); + + Calendar calendar = Calendar.getInstance(); + calendar.setTime(dateTime); + calendar.set(Calendar.HOUR,calendar.get(Calendar.HOUR) + 8); + SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return simpleDateFormat.format(calendar.getTime()); + } } \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MapUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MapUtil.java new file mode 100644 index 0000000..6b578b5 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MapUtil.java @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + + +package org.dubhe.biz.base.utils; + +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.TypeReference; + +import java.util.HashMap; +import java.util.Map; + +/** + * @description Map工具类 + * @date 2021-09-22 + */ +public class MapUtil { + public static Map convertJsonObject(JSONObject object) { + if (object == null) { + return new HashMap<>(); + } + return JSONObject.parseObject(object.toJSONString(), + new TypeReference>(){}); + } +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MathUtils.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MathUtils.java index 2d779c5..2ba57a5 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MathUtils.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/MathUtils.java @@ -72,4 +72,5 @@ public class MathUtils { return Float.valueOf(num1) / Float.valueOf(num2); } } + } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/PtModelUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/PtModelUtil.java index 8a521a6..77dbe55 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/PtModelUtil.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/PtModelUtil.java @@ -54,6 +54,8 @@ public class PtModelUtil { public static final int MODEL_OPTIMIZATION = 2; + public static final int AUTOMATIC_MACHINE_LEARNING = 4; + public static final int RANDOM_LENGTH = 4; } \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/RegexUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/RegexUtil.java index 99d00da..822a2c7 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/RegexUtil.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/RegexUtil.java @@ -17,6 +17,7 @@ package org.dubhe.biz.base.utils; +import cn.hutool.core.util.StrUtil; import lombok.extern.slf4j.Slf4j; import java.util.regex.Matcher; @@ -30,21 +31,23 @@ import java.util.regex.Pattern; public class RegexUtil { private static final String DIGIT = "^[0-9]*$"; private static final String FLOAT = "^[-+]?[0-9]*\\.?[0-9]+$"; + private static final String REGEX = "[^0-9]"; + /** * str待匹配文本 * regex 正则表达式 *返回str中匹配regex的第一个子串 */ - public static String getMatcher(String str,String regex) { - try{ - if (StringUtils.isEmpty(str) || StringUtils.isEmpty(regex)){ + public static String getMatcher(String str, String regex) { + try { + if (StringUtils.isEmpty(str) || StringUtils.isEmpty(regex)) { return ""; } Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); Matcher matcher = p.matcher(str); matcher.find(); return matcher.group(); - }catch (IllegalStateException e){ + } catch (IllegalStateException e) { log.error(e.getMessage(), e); return ""; } @@ -55,8 +58,8 @@ public class RegexUtil { * @param str * @return */ - public static boolean isDigits(String str){ - if (StringUtils.isEmpty(str)){ + public static boolean isDigits(String str) { + if (StringUtils.isEmpty(str)) { return false; } return str.matches(DIGIT); @@ -67,10 +70,25 @@ public class RegexUtil { * @param str * @return */ - public static boolean isFloat(String str){ - if (StringUtils.isEmpty(str)){ + public static boolean isFloat(String str) { + if (StringUtils.isEmpty(str)) { return false; } return str.matches(FLOAT); } + + /** + * 提取字符串中的数字 + * + * @param str 原字符串 + * @return Interger 格式化的后数字 + */ + public static Integer tranferRegEx(String str) { + String res = ""; + if (StrUtil.isNotEmpty(str)) { + Matcher matcher = Pattern.compile(REGEX).matcher(str); + res = matcher.replaceAll("").trim(); + } + return Integer.valueOf(res); + } } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java index 8c1d9c4..cfc263d 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java @@ -57,4 +57,11 @@ public class ResultUtil { throw new BusinessException(String.format(errorMessageTemplate, params)); } } + + + public static void isTrue(Boolean object, String errorMessageTemplate, Object... params) { + if (!Boolean.TRUE.equals(object)) { + throw new BusinessException(String.format(errorMessageTemplate, params)); + } + } } \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/StringUtils.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/StringUtils.java index 9905cfd..c15139e 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/StringUtils.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/StringUtils.java @@ -17,6 +17,7 @@ package org.dubhe.biz.base.utils; +import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSON; import eu.bitwalker.useragentutils.Browser; import eu.bitwalker.useragentutils.UserAgent; @@ -414,4 +415,11 @@ public class StringUtils extends org.apache.commons.lang3.StringUtils { } return null; } + + public static String toNumPrecent(String arg1, int arg2) { + if (StrUtil.isEmpty(arg1) || arg2 == 0) { + return "0"; + } + return String.valueOf(Math.round(Double.parseDouble(arg1) / arg2)); + } } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/GpuAllotVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/GpuAllotVO.java new file mode 100644 index 0000000..f2674c8 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/GpuAllotVO.java @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.vo; + +import lombok.Data; + +/** + * @description GPU具体型号资源配额 + * @date 2021-11-15 + */ +@Data +public class GpuAllotVO { + + /** + * 用户名 + */ + private String userName; + + /** + * gpu型号 + */ + private String gpuModel; + + /** + * 已分配的各个GPU的使用率 + */ + private int allotTotal; + + /** + * gpu总量 + */ + private int total; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelBranchQueryVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelBranchQueryVO.java index 25c3685..7f7e5b8 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelBranchQueryVO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelBranchQueryVO.java @@ -94,6 +94,11 @@ public class PtModelBranchQueryVO implements Serializable { private Long createUserId; /** + * 创建人用户名 + */ + private String createUserName; + + /** * 修改人ID */ private Long updateUserId; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelInfoQueryVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelInfoQueryVO.java index 5dd37d2..fe08c00 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelInfoQueryVO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/PtModelInfoQueryVO.java @@ -81,6 +81,11 @@ public class PtModelInfoQueryVO implements Serializable { private Integer totalNum; /** + * 有效版本数量 + */ + private Integer versionNum; + + /** * 团队ID */ private Integer teamId; @@ -91,6 +96,12 @@ public class PtModelInfoQueryVO implements Serializable { private Long createUserId; /** + * 创建人用户名 + */ + private String createUserName; + + + /** * 修改人ID */ private Long updateUserId; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryResourceSpecsVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryResourceSpecsVO.java index bac1c42..76782fb 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryResourceSpecsVO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryResourceSpecsVO.java @@ -48,7 +48,7 @@ public class QueryResourceSpecsVO implements Serializable { private Boolean resourcesPoolType; /** - *所属业务场景 + *所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving,4:dubhe-tadl) */ private Integer module; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryUserResourceSpecsVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryUserResourceSpecsVO.java new file mode 100644 index 0000000..aa73a13 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/QueryUserResourceSpecsVO.java @@ -0,0 +1,104 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; +import java.sql.Timestamp; + +/** + * @description 用户资源规格查询结果封装类 + * @date 2021-06-02 + */ +@Data +@Accessors(chain = true) +public class QueryUserResourceSpecsVO implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * 主键ID + */ + private Long id; + + /** + *规格名称 + */ + private String specsName; + + /** + *规格类型(0为CPU, 1为GPU) + */ + private Boolean resourcesPoolType; + + /** + *所属业务场景(0:通用,1:dubhe-notebook,2:dubhe-train,3:dubhe-serving,4:dubhe-tadl) + */ + private Integer module; + + /** + *CPU数量,单位:核 + */ + private Integer cpuNum; + + /** + *GPU数量,单位:核 + */ + private Integer gpuNum; + + /** + *内存大小,单位:M + */ + private Integer memNum; + + /** + *工作空间的存储配额,单位:M + */ + private Integer workspaceRequest; + + /** + *创建人 + */ + private Long createUserId; + + /** + *创建时间 + */ + private Timestamp createTime; + + /** + *更新人 + */ + private Long updateUserId; + + /** + *更新时间 + */ + private Timestamp updateTime; + + /** + *资源是否有效 + */ + private Boolean valid; + + /** + *资源信息 + */ + private String message; +} \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotResourceVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotResourceVO.java new file mode 100644 index 0000000..11319ac --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotResourceVO.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.vo; + +import lombok.Data; + +import java.util.List; + +/** + * @description + * @date 2021-11-15 + */ +@Data +public class UserAllotResourceVO { + + /** + * 已分配给用户的内存 + */ + private int memoryAllotTotal; + + /** + * 已分配给用户的CPU + */ + private int cpuAllotTotal; + + /** + * 已分配给用户的GPU + */ + private int gpuAllotTotal; + + /** + * 已分配给用户的GPU + */ + private List gpuAllotList; + +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotVO.java new file mode 100644 index 0000000..1a857cb --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserAllotVO.java @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.vo; + +import lombok.Data; + +import java.util.List; + +/** + * @description 用户资源配额信息VO + * @date 2021-11-17 + */ + +@Data +public class UserAllotVO { + + /** + * 用户Id + */ + private Long userId; + + /* + * 用户名 + */ + private String userName; + + /** + * 资源配额总量 + */ + private String allotTotal; + + /** + * GPU具体型号配额 + */ + private List gpuAllotList; + +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserConfigVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserConfigVO.java new file mode 100644 index 0000000..75edde1 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserConfigVO.java @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; +import java.util.List; + +/** + * @description 用户配置 VO + * @date 2021-7-1 + */ +@Data +@Accessors(chain = true) +public class UserConfigVO implements Serializable { + private static final long serialVersionUID = 1L; + + private Long userId; + + private Integer notebookDelayDeleteTime; + + private Integer cpuLimit; + + private Integer memoryLimit; + + private List gpuResources; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserGpuConfigVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserGpuConfigVO.java new file mode 100644 index 0000000..9ab5ff1 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/UserGpuConfigVO.java @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.vo; + +import lombok.Data; +import lombok.experimental.Accessors; +import javax.validation.constraints.NotNull; + +/** + * @description 用户GPU配置实体 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +public class UserGpuConfigVO { + + /** + * 主键ID + */ + private Long id; + + /** + * 用户id + */ + private Long userId; + + /** + * GPU类型(例如:NVIDIA) + */ + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + + /** + * 用户显卡资源限制配置,单位:卡 + */ + private Integer gpuLimit; +} \ No newline at end of file diff --git a/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/BaseLogQuery.java b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/BaseLogQuery.java new file mode 100644 index 0000000..66ac107 --- /dev/null +++ b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/BaseLogQuery.java @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.db.base; + +import lombok.Data; +import lombok.experimental.Accessors; + +/** + * @description 日志基类 + * @date 2021-03-11 + */ +@Data +@Accessors(chain = true) +public class BaseLogQuery { + + private Integer startLine; + + private Integer lines; +} diff --git a/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/PageQueryBase.java b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/PageQueryBase.java index e1208b3..e438330 100644 --- a/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/PageQueryBase.java +++ b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/base/PageQueryBase.java @@ -49,6 +49,7 @@ public class PageQueryBase { */ private String order; + public Page toPage() { Page page = new Page(); if (this.current != null) { diff --git a/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/entity/PageResult.java b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/entity/PageResult.java new file mode 100644 index 0000000..2959341 --- /dev/null +++ b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/entity/PageResult.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + + +package org.dubhe.biz.db.entity; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import lombok.Data; + +import java.util.List; + +/** + * @description Entity基础类 + * @date 2021-10-12 + */ +@Data +public class PageResult { + + private List result; + Page page; + + public PageResult(IPage iPage, List data) { + page = new Page(); + page.current = iPage.getCurrent(); + page.size = iPage.getSize(); + page.total = iPage.getTotal(); + result = data; + } + + @Data + class Page { + private long current; + private long size; + private long total; + } +} diff --git a/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/utils/PageUtil.java b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/utils/PageUtil.java index c11cb04..39c6a83 100644 --- a/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/utils/PageUtil.java +++ b/dubhe-server/common-biz/db/src/main/java/org/dubhe/biz/db/utils/PageUtil.java @@ -61,14 +61,14 @@ public class PageUtil extends cn.hutool.core.util.PageUtil { * 自定义分页 */ public static Map toPage(IPage page, Collection data) { - Map map = new LinkedHashMap<>(2); + Map map = new LinkedHashMap<>(4); map.put("result", data); map.put("page", buildPagination(page)); return map; } private static Map buildPagination(IPage page) { - Map map = new HashMap<>(2); + Map map = new HashMap<>(4); map.put("current", page.getCurrent()); map.put("size", page.getSize()); map.put("total", page.getTotal()); diff --git a/dubhe-server/common-biz/file/pom.xml b/dubhe-server/common-biz/file/pom.xml index 4f411e3..83422e8 100644 --- a/dubhe-server/common-biz/file/pom.xml +++ b/dubhe-server/common-biz/file/pom.xml @@ -72,6 +72,16 @@ org.apache.poi poi-ooxml + + junit + junit + 4.13.1 + + + io.minio + minio + 7.0.2 + diff --git a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/impl/ShellFileStoreApiImpl.java b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/impl/ShellFileStoreApiImpl.java index 74fc431..82ddb46 100644 --- a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/impl/ShellFileStoreApiImpl.java +++ b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/impl/ShellFileStoreApiImpl.java @@ -62,14 +62,14 @@ public class ShellFileStoreApiImpl implements FileStoreApi { * 删除服务器无效文件(大文件) * 示例:rsync --delete-before -d /空目录 /需要回收的源目录 */ - public static final String DEL_COMMAND = "ssh %s@%s \"mkdir -p %s; rsync --delete-before -d %s %s; rmdir %s %s\""; + public static final String DEL_COMMAND = "mkdir -p %s; rsync --delete-before -d %s %s; rmdir %s %s"; /** * 拷贝文件并重命名 */ public static final String COPY_RENAME_COMMAND = "ssh %s@%s \"cp -rf %s %s && echo success\""; - /** + /** * 文件复制 * rsync -avP --exclude={'dir'} sourcePath targetPath 将原路径复制到目标路径下,过滤dir目录 * 示例:rsync -avP --exclude={'V0001'} /root/test/ /root/test2/ @@ -264,4 +264,4 @@ public class ShellFileStoreApiImpl implements FileStoreApi { public void filterFilePageWithPath(FilePageDTO filePageDTO) { } -} \ No newline at end of file +} diff --git a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/IOUtil.java b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/IOUtil.java index 51b09e6..1070dde 100644 --- a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/IOUtil.java +++ b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/IOUtil.java @@ -20,7 +20,10 @@ import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import java.io.Closeable; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; /** * @description IO流操作工具类 @@ -44,4 +47,26 @@ public class IOUtil { } } } + + /** + * 将input流转换为文件 + * + * @param is 输入流 调用方负责关闭 + * @param targetFile 目标文件 + */ + public static void copy(InputStream is, File targetFile) { + try (FileOutputStream fos = new FileOutputStream(targetFile)) { + byte[] b = new byte[1024]; + int readCount = is.read(b); + while (readCount != -1) { + // 写入数据 + fos.write(b, 0, readCount); + readCount = is.read(b); + } + is.close(); + fos.flush(); + } catch (IOException e) { + LogUtil.error(LogEnum.IO_UTIL,"copy file error:【{}】", e); + } + } } diff --git a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/MinioUtil.java b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/MinioUtil.java index c73e6ca..34ef613 100644 --- a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/MinioUtil.java +++ b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/utils/MinioUtil.java @@ -18,6 +18,7 @@ package org.dubhe.biz.file.utils; import cn.hutool.core.io.IoUtil; +import com.alibaba.fastjson.JSONObject; import io.minio.CopyConditions; import io.minio.MinioClient; import io.minio.PutObjectOptions; @@ -40,6 +41,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @description Minio工具类 @@ -280,4 +283,33 @@ public class MinioUtil { } } + /** + * 生成给HTTP PUT请求用的presigned URLs。浏览器/移动端的客户端可以用这个URL进行上传, + * 即使其所在的存储桶是私有的。这个presigned URL可以设置一个失效时间,默认值是7天 + * + * @param bucketName 存储桶名称 + * @param objectNames 存储桶里的对象名称 + * @param expires 失效时间(以秒为单位),默认是7天,不得大于七天 + * @return String + */ + public JSONObject getEncryptedPutUrls(String bucketName,String objectNames, Integer expires) { + List filePaths = JSONObject.parseObject(objectNames, List.class); + List urls = new ArrayList<>(); + filePaths.stream().forEach(filePath->{ + if (StringUtils.isEmpty(filePath)) { + throw new BusinessException("filePath cannot be empty"); + } + try { + urls.add(client.presignedPutObject(bucketName, filePath, expires)); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, e.getMessage()); + throw new BusinessException("MinIO an error occurred, please contact the administrator"); + } + }); + JSONObject jsonObject = new JSONObject(); + jsonObject.put("preUrls",urls); + jsonObject.put("bucketName", bucketName); + return jsonObject; + } + } diff --git a/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java b/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java index 6106118..e0d3e40 100644 --- a/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java +++ b/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java @@ -79,7 +79,9 @@ public enum LogEnum { //云端Serving SERVING, //专业版终端 - TERMINAL; + TERMINAL, + //tadl + TADL; /** * 判断日志类型不能为空 diff --git a/dubhe-server/common-biz/log/src/main/resources/logback.xml b/dubhe-server/common-biz/log/src/main/resources/logback.xml index bc8007d..c19f544 100644 --- a/dubhe-server/common-biz/log/src/main/resources/logback.xml +++ b/dubhe-server/common-biz/log/src/main/resources/logback.xml @@ -259,4 +259,4 @@ - + \ No newline at end of file diff --git a/dubhe-server/common-biz/redis/pom.xml b/dubhe-server/common-biz/redis/pom.xml index 60960da..d672e59 100644 --- a/dubhe-server/common-biz/redis/pom.xml +++ b/dubhe-server/common-biz/redis/pom.xml @@ -24,6 +24,11 @@ com.liferay com.fasterxml.jackson.databind + + com.amazonaws + aws-java-sdk + 1.12.35 + diff --git a/dubhe-server/common-biz/redis/src/main/java/org/dubhe/biz/redis/utils/RedisUtils.java b/dubhe-server/common-biz/redis/src/main/java/org/dubhe/biz/redis/utils/RedisUtils.java index 16e8d09..500b02c 100644 --- a/dubhe-server/common-biz/redis/src/main/java/org/dubhe/biz/redis/utils/RedisUtils.java +++ b/dubhe-server/common-biz/redis/src/main/java/org/dubhe/biz/redis/utils/RedisUtils.java @@ -24,10 +24,7 @@ import org.dubhe.biz.log.utils.LogUtil; import org.springframework.beans.factory.annotation.Value; import org.springframework.data.redis.connection.RedisConnection; import org.springframework.data.redis.connection.RedisConnectionFactory; -import org.springframework.data.redis.core.Cursor; -import org.springframework.data.redis.core.RedisConnectionUtils; -import org.springframework.data.redis.core.RedisTemplate; -import org.springframework.data.redis.core.ScanOptions; +import org.springframework.data.redis.core.*; import org.springframework.data.redis.core.script.DefaultRedisScript; import org.springframework.data.redis.core.script.RedisScript; import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer; @@ -670,6 +667,40 @@ public class RedisUtils { return zRangeByScorePop( key,0, max,0,1); } + /** + * 根据键获取score值为 min 到 max 之间的所有 member 和 score + * @param key 健 + * @param min score最小值 + * @param max score最大值 + * @return + */ + public Set> zRangeByScoreWithScores(String key, Long min, Long max){ + try { + return redisTemplate.opsForZSet().rangeWithScores(key, min, max); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "RedisUtils rangeWithScores key {} error:{}", key, e.getMessage(), e); + return null; + } + } + + /** + * 根据 key 和 member 移除元素 + * @param key + * @param member + * @return + */ + public Boolean zRem(String key,Object member){ + try{ + if (StringUtils.isEmpty(key) || null == member){ + return false; + } + redisTemplate.opsForZSet().remove(key,member); + return true; + }catch (Exception e){ + LogUtil.error(LogEnum.REDIS, "RedisUtils zrem key {} member {} error:{}", key, member, e); + return false; + } + } // ===============================list================================= diff --git a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java index d107715..1f29347 100644 --- a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java +++ b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java @@ -16,12 +16,17 @@ */ package org.dubhe.cloud.authconfig.service; +import feign.Param; import org.dubhe.biz.base.constant.ApplicationNameConst; import org.dubhe.biz.base.context.UserContext; +import org.dubhe.biz.base.dto.UserConfigSaveDTO; import org.dubhe.biz.base.dto.UserDTO; import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserAllotResourceVO; +import org.dubhe.biz.base.vo.UserConfigVO; import org.springframework.cloud.openfeign.FeignClient; import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PutMapping; import org.springframework.web.bind.annotation.RequestParam; import java.util.List; @@ -47,4 +52,29 @@ public interface AdminClient { @GetMapping(value = "/users/findByIds") DataResponseBody> getUserList(@RequestParam(value = "ids") List ids); + + /** + * 更改用户配置 + * + * @param userConfigSaveDTO 用户配置DTO + */ + @PutMapping(value = "/users/setUserConfig") + DataResponseBody setUserConfig(@Param("userConfigSaveDTO") UserConfigSaveDTO userConfigSaveDTO); + + /** + * 获取用户配置 + * + * @param userId 用户ID + */ + @GetMapping(value = "/users/getUserConfig") + DataResponseBody getUserConfig(@RequestParam(value = "userId") Long userId); + + /** + * 获取用户资源配额总量 + * + * @return UserAllotResourceVO 用户资源配额 + */ + @GetMapping(value = "/users/userAllot") + DataResponseBody getUserAllotTotal(); + } diff --git a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClientFallback.java b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClientFallback.java index 8bb550e..a6e55a9 100644 --- a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClientFallback.java +++ b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClientFallback.java @@ -16,8 +16,10 @@ */ package org.dubhe.cloud.authconfig.service; +import org.dubhe.biz.base.dto.UserConfigSaveDTO; import org.dubhe.biz.base.dto.UserDTO; import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.UserAllotResourceVO; import org.dubhe.biz.dataresponse.factory.DataResponseFactory; import org.springframework.stereotype.Component; @@ -44,4 +46,19 @@ public class AdminClientFallback implements AdminClient { return DataResponseFactory.failed("call user controller to get users error"); } -} \ No newline at end of file + @Override + public DataResponseBody setUserConfig(UserConfigSaveDTO userConfigCreateOrUpdateDTO) { + return DataResponseFactory.failed("call admin server setUserConfig error"); + } + + @Override + public DataResponseBody getUserConfig(Long userId) { + return DataResponseFactory.failed("call admin server getUserConfig error"); + } + + + @Override + public DataResponseBody getUserAllotTotal() { + return DataResponseFactory.failed("call admin server getUserAllotTotal error "); + } +} diff --git a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/impl/OAuth2UserContextServiceImpl.java b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/impl/OAuth2UserContextServiceImpl.java index 390f319..13297f7 100644 --- a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/impl/OAuth2UserContextServiceImpl.java +++ b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/impl/OAuth2UserContextServiceImpl.java @@ -32,12 +32,12 @@ public class OAuth2UserContextServiceImpl implements UserContextService { @Override public UserContext getCurUser() { JwtUserDTO jwtUserDTO = JwtUtils.getCurUser(); - return jwtUserDTO == null?null:jwtUserDTO.getUser(); + return jwtUserDTO == null ? null : jwtUserDTO.getUser(); } @Override public Long getCurUserId() { UserContext userContext = getCurUser(); - return userContext == null?null:userContext.getId(); + return userContext == null ? null : userContext.getId(); } } diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-ai.yaml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-ai.yaml new file mode 100644 index 0000000..64419c0 --- /dev/null +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-ai.yaml @@ -0,0 +1,12 @@ +spring: + cloud: + nacos: + username: nacos + password: Tianshu + context-path: /nacos + config: + namespace: dubhe-server-cloud-ai + server-addr: 10.105.1.132:8848 + discovery: + namespace: dubhe-server-cloud-ai + server-addr: 10.105.1.132:8848 diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-open-dev.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-open-dev.yml new file mode 100644 index 0000000..f70f2c4 --- /dev/null +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-open-dev.yml @@ -0,0 +1,12 @@ +spring: + cloud: + nacos: + username: nacos + password: Tianshu + context-path: /nacos + config: + namespace: dubhe-server-cloud-open-dev + server-addr: 10.105.1.132:8848 + discovery: + namespace: dubhe-server-cloud-open-dev + server-addr: 10.105.1.132:8848 \ No newline at end of file diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-opendev.yaml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-opendev.yaml new file mode 100644 index 0000000..b53c1a1 --- /dev/null +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-opendev.yaml @@ -0,0 +1,12 @@ +spring: + cloud: + nacos: + username: nacos + password: Tianshu + context-path: /nacos + config: + namespace: dubhe-server-cloud-open-dev + server-addr: 10.105.1.132:8848 + discovery: + namespace: dubhe-server-cloud-open-dev + server-addr: 10.105.1.132:8848 diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml index b8cc6ee..9584cc0 100644 --- a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml @@ -1,9 +1,12 @@ spring: cloud: nacos: + username: nacos + password: Tianshu + context-path: /nacos config: namespace: dubhe-server-cloud-pre - server-addr: 10.105.1.133:8848 + server-addr: 10.105.1.132:8848 discovery: namespace: dubhe-server-cloud-pre - server-addr: 10.105.1.133:8848 + server-addr: 10.105.1.132:8848 diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml new file mode 100644 index 0000000..82e55d7 --- /dev/null +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml @@ -0,0 +1,12 @@ +spring: + cloud: + nacos: + username: nacos + password: Tianshu + context-path: /nacos + config: + namespace: dubhe-server-cloud-prod + server-addr: 10.105.1.132:8848 + discovery: + namespace: dubhe-server-cloud-prod + server-addr: 10.105.1.132:8848 diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java index e583a57..68c829e 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java @@ -74,6 +74,7 @@ public class DockerCallbackTool { LogUtil.info(LogEnum.TERMINAL, "{} sendPushCallback {} count {} status:{}", url, dockerPushCallbackDTO,count,httpResponse.getStatus()); //重试 if (HttpStatus.HTTP_OK != httpResponse.getStatus() && count > MagicNumConstant.ZERO){ + Thread.sleep(MagicNumConstant.ONE_THOUSAND); sendPushCallback(dockerPushCallbackDTO,url,--count); } }catch (Exception e){ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/HarborApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/HarborApi.java index 2491c6b..12aa226 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/HarborApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/HarborApi.java @@ -67,4 +67,21 @@ public interface HarborApi { */ void deleteImageByTag(String imageUrl); + /** + * + * @param userId 用户Id + * @param name 镜像名称 + * @param tag 镜像标签 + * @return + */ + String getFullImageUrl(Long userId,String name,String tag); + + /** + * + * @param userId 用户Id + * @param name 镜像名称 + * @param tag 镜像标签 + * @return + */ + String getImageUrl(Long userId,String name,String tag); } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/impl/HarborApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/impl/HarborApiImpl.java index d3b6824..1beda86 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/impl/HarborApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/harbor/api/impl/HarborApiImpl.java @@ -18,10 +18,12 @@ package org.dubhe.harbor.api.impl; +import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.StringConstant; import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.harbor.api.HarborApi; @@ -309,4 +311,15 @@ public class HarborApiImpl implements HarborApi { } } + @Override + public String getFullImageUrl(Long userId, String name, String tag) { + + return imagePullUrl+ getImageUrl(userId,name,tag); + } + + @Override + public String getImageUrl(Long userId, String name, String tag) { + return StringConstant.COMMON+ StrUtil.SLASH +userId+ StrUtil.SLASH+name+StrUtil.COLON+tag; + } + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/LogMonitoringApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/LogMonitoringApi.java index 20d9bd0..91a62ad 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/LogMonitoringApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/LogMonitoringApi.java @@ -75,4 +75,31 @@ public interface LogMonitoringApi { */ long searchLogCountByPodName(LogMonitoringBO logMonitoringBo); + + /** + * 日志查询方法 + * + * @param logMonitoringBo 日志查询bo + * @return LogMonitoringVO 日志查询结果类 + */ + LogMonitoringVO searchLog(LogMonitoringBO logMonitoringBo); + + /** + * 添加 TADL 服务日志到 Elasticsearch + * + * @param experimentId Experiment ID + * @param log 日志 + * @return boolean 日志添加是否成功 + */ + boolean addTadlLogsToEs(long experimentId, String log); + + /** + * TADL 服务日志查询方法 + * + * @param from 日志查询起始值,初始值为1,表示从第一条日志记录开始查询 + * @param size 日志查询记录数 + * @param experimentId TADL Experiment ID + * @return LogMonitoringVO 日志查询结果类 + */ + LogMonitoringVO searchTadlLogById(int from, int size, long experimentId); } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/MetricsApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/MetricsApi.java index d16c272..b7cda70 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/MetricsApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/MetricsApi.java @@ -17,13 +17,16 @@ package org.dubhe.k8s.api; +import org.dubhe.biz.base.vo.UserAllotVO; import org.dubhe.k8s.domain.dto.PodQueryDTO; +import org.dubhe.k8s.domain.vo.GpuUsageVO; import org.dubhe.k8s.domain.vo.PodRangeMetricsVO; import org.dubhe.k8s.domain.vo.PtContainerMetricsVO; import org.dubhe.k8s.domain.vo.PtNodeMetricsVO; import org.dubhe.k8s.domain.vo.PtPodsVO; import java.util.List; +import java.util.Map; /** * @description 监控信息查询接口 @@ -104,4 +107,17 @@ public interface MetricsApi { */ List getContainerMetrics(); + + /** + * 查询某个节点的gpu使用率 + * + * @return Map> 节点gpu使用率集合 + */ + Map> getNodeGpuUsage(); + + + List getNamespaceUsageRate(Integer resourceType, String sumDay); + + Map getResourceUsageByUser(Integer resourceType, String sumDay, String namespaces); + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/NodeApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/NodeApi.java index d2b531a..8053088 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/NodeApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/NodeApi.java @@ -1,12 +1,12 @@ /** * Copyright 2020 Tianshu AI Platform. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,7 @@ package org.dubhe.k8s.api; import io.fabric8.kubernetes.api.model.Toleration; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.resource.BizNode; import org.dubhe.k8s.domain.resource.BizTaint; import org.dubhe.k8s.enums.LackOfResourcesEnum; @@ -91,7 +92,7 @@ public interface NodeApi { * @param value 标签value * @return List */ - List getWithLabel(String key,String value); + List getWithLabel(String key, String value); /** * 根据标签查询节点 @@ -115,29 +116,39 @@ public interface NodeApi { * * @param nodeSelector 节点选择标签 * @param taints 该资源所能容忍的污点 - * @param cpuNum 单位为m 1核等于1000m - * @param memNum 单位为Mi 1Mi等于1024Ki - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 资源通用属性基类 * @return LackOfResourcesEnum 资源缺乏枚举类 */ - LackOfResourcesEnum isAllocatable(Map nodeSelector, List taints, Integer cpuNum, Integer memNum, Integer gpuNum); + LackOfResourcesEnum isAllocatable(Map nodeSelector, List taints, BaseResourceBo baseResourceBo); /** * 查询集群资源是否充足 * - * @param cpuNum 单位为m 1核等于1000m - * @param memNum 单位为Mi 1Mi等于1024Ki - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 资源通用属性基类 * @return LackOfResourcesEnum 资源缺乏枚举类 */ - LackOfResourcesEnum isAllocatable(Integer cpuNum, Integer memNum, Integer gpuNum); + LackOfResourcesEnum isAllocatable(BaseResourceBo baseResourceBo); + + /** + * 查询集群资源是否充足 + * + * @param namespace 命名空间 + * @param cpuNum cpu限制 单位核 0表示不限制 + * @param memNum 内存限制 单位G 0表示不限制 + * @param k8sLabelKey k8s GPU资源标签key值(例如:nvidia.com/gpu) + * @param gpuNum GPU数量,0表示共享显卡,null表示不使用显卡 + * @param gpuModel gpu型号 + * @return LackOfResourcesEnum 资源缺乏枚举类 + */ + LackOfResourcesEnum isAllocatableConvert(String namespace, Integer cpuNum, Integer memNum, Boolean useGpu, String k8sLabelKey, String gpuModel, Integer gpuNum); /** * 判断是否超出总可分配gpu数 * @param gpuNum + * @param gpuModel gpu型号 * @return LackOfResourcesEnum 资源缺乏枚举类 */ - LackOfResourcesEnum isOutOfTotalAllocatableGpu(Integer gpuNum); + LackOfResourcesEnum isOutOfTotalAllocatableGpu(String k8sLabelKey, String gpuModel, Integer gpuNum); /** * 添加污点 @@ -189,9 +200,9 @@ public interface NodeApi { /** * 获取当前用户 资源隔离 NodeSelector - * @return Map + * @return Map */ - Map getNodeIsolationNodeSelector(); + Map getNodeIsolationNodeSelector(); /** * 根据userid 生成 BizTaint 列表 @@ -207,4 +218,5 @@ public interface NodeApi { * @return */ List geBizTaintListByUserId(); + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java index e3f734f..465af9d 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java @@ -88,6 +88,15 @@ public interface PersistentVolumeClaimApi { PtBaseResult delete(String namespace, String pvcName); /** + * 删除PVC + * + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return PtBaseResult 基础结果类 + */ + PtBaseResult deletePvcByResourceName(String namespace, String resourceName); + + /** * 拼接storageClassName * * @param pvcName PVC 名称 diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PodApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PodApi.java index 86b589a..7d5dc1e 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PodApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PodApi.java @@ -18,10 +18,13 @@ package org.dubhe.k8s.api; import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.client.KubernetesClient; import org.dubhe.k8s.domain.bo.LabelBO; import org.dubhe.k8s.domain.resource.BizPod; import org.dubhe.k8s.domain.vo.PtPodsVO; +import org.dubhe.k8s.utils.LabelUtils; +import java.io.File; import java.util.List; import java.util.Map; import java.util.Set; @@ -114,6 +117,13 @@ public interface PodApi { List list(Set labelBos); /** + * 根据resourceName查询Pod集合 + * @param resourceName + * @return + */ + List listByResourceName(String resourceName); + + /** * 根据命名空间查询Pod集合 * * @param namespace 命名空间 @@ -150,6 +160,14 @@ public interface PodApi { String getToken(String namespace, String podName); /** + * 根据resourceName 获取pod对应k8s中labels + * + * @param resourceName 资源名称 + * @return Map map + */ + Map getLabels(String resourceName); + + /** * 根据命名空间和资源名获得Token信息 * * @param namespace 命名空间 @@ -167,7 +185,34 @@ public interface PodApi { */ String getUrlByResourceName(String namespace, String resourceName); + /** + * 拷贝文件到pod + * @param namespace 命名空间 + * @param podName pod名称 + * @param containerName 容器名称 + * @param file 文件 + * @param targetDir 目标路径 + */ + void copyToPod(String namespace, String podName, String containerName, File file, String targetDir); + /** + * 同步执行 + * @param namespace 命名空间 + * @param podName pod名称 + * @param containerName 容器名称 + * @param cmd 命令 + */ + void exec(String namespace, String podName, String containerName, String cmd); + /** + * 设置pod间 ssh免密登录 + * @param podList pod 列表 + */ + void sshAuthentication(List podList); + /** + * 设置pod NODE_IPS 环境变量为 pod ip 列表 + * @param podList pod 列表 + */ + void setNodeIpsEnv(List podList); } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ResourceQuotaApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ResourceQuotaApi.java index 278903d..e38f7c5 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ResourceQuotaApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ResourceQuotaApi.java @@ -18,11 +18,13 @@ package org.dubhe.k8s.api; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.PtResourceQuotaBO; import org.dubhe.k8s.domain.resource.BizResourceQuota; import org.dubhe.k8s.enums.LimitsOfResourcesEnum; import java.util.List; +import java.util.Map; /** * @description 限制命名空间整体的资源配额 @@ -41,12 +43,12 @@ public interface ResourceQuotaApi { * 创建 ResourceQuota * @param namespace 命名空间 * @param name ResourceQuota 名称 - * @param cpu cpu限制 单位核 0表示不限制 - * @param memory 内存限制 单位G 0表示不限制 - * @param gpu gpu限制 单位张 0表示不限制 + * @param cpu cpu限制 单位核 + * @param memory 内存限制 单位G + * @param gpuLimit gpu限制 单位张 * @return */ - BizResourceQuota create(String namespace,String name,Integer cpu,Integer memory,Integer gpu); + BizResourceQuota create(String namespace, String name, Integer cpu, Integer memory, Map gpuLimit); /** * 根据命名空间查询ResourceQuota集合 @@ -68,10 +70,20 @@ public interface ResourceQuotaApi { /** * 判断资源是否达到限制 * - * @param cpuNum 单位为m 1核等于1000m - * @param memNum 单位为Mi 1Mi等于1024Ki - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 资源通用属性基类 * @return LimitsOfResourcesEnum 资源超限枚举类 */ - LimitsOfResourcesEnum reachLimitsOfResources(String namespace,Integer cpuNum, Integer memNum, Integer gpuNum); + LimitsOfResourcesEnum reachLimitsOfResources(BaseResourceBo baseResourceBo); + + /** + * 判断资源是否达到限制 + * + * @param namespace 命名空间 + * @param cpuNum cpu限制 单位核 0表示不限制 + * @param memNum 内存限制 单位G 0表示不限制 + * @param gpuNum gpu限制 + * @param k8sLabelKey k8s GPU资源标签key值(例如:nvidia.com/gpu) + * @return LimitsOfResourcesEnum 资源超限枚举类 + */ + LimitsOfResourcesEnum reachLimitsOfResourcesConvert(String namespace, Integer cpuNum, Integer memNum, Integer gpuNum, String k8sLabelKey); } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ServiceApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ServiceApi.java new file mode 100644 index 0000000..6deb4b2 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/ServiceApi.java @@ -0,0 +1,29 @@ +package org.dubhe.k8s.api; + +import org.dubhe.k8s.domain.resource.BizService; + +import java.util.List; +import java.util.Map; + +/** + * @description k8s Service接口 + * @date 2021-10-27 + */ +public interface ServiceApi { + + /** + * 查询命名空间下所有service + * + * @param namespace 命名空间 + * @return List Service业务类集合 + */ + List getWithNameSpace(String namespace); + + /** + * 根据resourceName 获取service对应k8s中labels + * + * @param resourceName 资源名称 + * @return Map map + */ + Map getLabels(String resourceName); +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java index 0a2c506..c2f5a5f 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java @@ -46,8 +46,10 @@ import org.dubhe.k8s.api.ResourceIisolationApi; import org.dubhe.k8s.api.ResourceQuotaApi; import org.dubhe.k8s.api.VolumeApi; import org.dubhe.k8s.cache.ResourceCache; +import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.BuildFsVolumeBO; import org.dubhe.k8s.domain.bo.DistributeTrainBO; import org.dubhe.k8s.domain.bo.TaskYamlBO; @@ -58,17 +60,15 @@ import org.dubhe.k8s.domain.cr.DistributeTrainSpec; import org.dubhe.k8s.domain.entity.K8sTask; import org.dubhe.k8s.domain.resource.BizDistributeTrain; import org.dubhe.k8s.domain.vo.VolumeVO; -import org.dubhe.k8s.enums.ImagePullPolicyEnum; -import org.dubhe.k8s.enums.K8sKindEnum; -import org.dubhe.k8s.enums.K8sResponseEnum; -import org.dubhe.k8s.enums.LackOfResourcesEnum; -import org.dubhe.k8s.enums.LimitsOfResourcesEnum; +import org.dubhe.k8s.enums.*; +import org.dubhe.k8s.service.K8sGpuConfigService; import org.dubhe.k8s.service.K8sTaskService; import org.dubhe.k8s.utils.BizConvertUtils; +import org.dubhe.k8s.utils.K8sCommonUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; import java.io.ByteArrayInputStream; import java.sql.Timestamp; @@ -84,7 +84,6 @@ import static org.dubhe.biz.base.constant.MagicNumConstant.SIXTY_LONG; import static org.dubhe.biz.base.constant.MagicNumConstant.THOUSAND_LONG; import static org.dubhe.biz.base.constant.MagicNumConstant.ZERO; import static org.dubhe.biz.base.constant.SymbolConstant.BLANK; -import static org.dubhe.k8s.constant.K8sParamConstants.GPU_RESOURCE_KEY; import static org.dubhe.k8s.constant.K8sParamConstants.NODE_READY_TRUE; import static org.dubhe.k8s.constant.K8sParamConstants.PYTHONUNBUFFERED; import static org.dubhe.k8s.constant.K8sParamConstants.QUANTITY_CPU_KEY; @@ -130,6 +129,12 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sCommonUtils k8sCommonUtils; + + @Autowired + private K8sGpuConfigService k8sGpuConfigService; + private KubernetesClient client; private MixedOperation> dtClient; @@ -147,12 +152,22 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { @Override public BizDistributeTrain create(DistributeTrainBO bo) { LogUtil.info(LogEnum.BIZ_K8S, "Params of creating DistributeTrain--create:{}", bo); - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum() * bo.getSize(), bo.getMemNum() * bo.getSize(), bo.getGpuNum() == null?0:bo.getGpuNum() * bo.getSize()); + + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + baseResourceBo.setCpuNum(bo.getCpuNum() * bo.getSize()); + baseResourceBo.setMemNum(bo.getMemNum() * bo.getSize()); + baseResourceBo.setGpuNum(bo.getGpuNum() == null ? 0 : bo.getGpuNum() * bo.getSize()); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new BizDistributeTrain().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } if (bo.getGpuNum() != null && bo.getGpuNum() > 0) { - LackOfResourcesEnum lack = nodeApi.isOutOfTotalAllocatableGpu(bo.getGpuNum() * bo.getSize()); + Integer k8sGpuNumLimit = k8sGpuConfigService.getGpuLimit(bo.getNamespace(), bo.getGpuModel(), bo.getK8sLabelKey()); + if(bo.getGpuNum() > k8sGpuNumLimit){ + return new BizDistributeTrain().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), LimitsOfResourcesEnum.LIMITS_OF_GPU.getMessage()); + } + LackOfResourcesEnum lack = nodeApi.isOutOfTotalAllocatableGpu(baseResourceBo.getK8sLabelKey(), baseResourceBo.getGpuModel(), bo.getGpuNum() * bo.getSize()); if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { return new BizDistributeTrain().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } @@ -179,6 +194,8 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { private Integer memNum; private Integer cpuNum; private Integer gpuNum; + private String k8sLabelKey; + private String gpuModel; private String slaveCmd; private Map env; private Map baseLabels; @@ -199,6 +216,9 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { this.memNum = bo.getMemNum(); this.cpuNum = bo.getCpuNum(); this.gpuNum = bo.getGpuNum(); + this.k8sLabelKey = bo.getK8sLabelKey(); + this.gpuModel = bo.getGpuModel(); + this.slaveCmd = bo.getSlaveCmd(); this.env = bo.getEnv(); this.businessLabel = bo.getBusinessLabel(); @@ -260,16 +280,18 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { masterResources.setLimits(new HashMap() {{ Optional.ofNullable(memNum).ifPresent(v -> put(QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); Optional.ofNullable(cpuNum).ifPresent(v -> put(QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); - Optional.ofNullable(gpuNum).ifPresent(v -> put(GPU_RESOURCE_KEY, new Quantity(v.toString()))); + Optional.ofNullable(gpuNum).ifPresent(v -> put(k8sLabelKey, new Quantity(v.toString()))); }}); + k8sCommonUtils.addRdmaResource(masterResources.getLimits()); distributeTrainSpec.setMasterResources(masterResources); //slave节点申请资源 ResourceRequirements slaveResources = new ResourceRequirements(); slaveResources.setLimits(new HashMap() {{ Optional.ofNullable(memNum).ifPresent(v -> put(QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); Optional.ofNullable(cpuNum).ifPresent(v -> put(QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); - Optional.ofNullable(gpuNum).ifPresent(v -> put(GPU_RESOURCE_KEY, new Quantity(v.toString()))); + Optional.ofNullable(gpuNum).ifPresent(v -> put(k8sLabelKey, new Quantity(v.toString()))); }}); + k8sCommonUtils.addRdmaResource(slaveResources.getLimits()); distributeTrainSpec.setSlaveResources(slaveResources); //配置环境变量 List envVarList = new ArrayList() {{ @@ -283,6 +305,10 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { if (gpuNum != null && gpuNum != 0) { envVarList.add(new EnvVarBuilder().withName(GPU_NUM_PER_NODE).withValue(String.valueOf(gpuNum)).build()); + Map gpuLabel = new HashMap(2); + gpuLabel.put(K8sLabelConstants.NODE_GPU_LABEL_KEY, K8sLabelConstants.NODE_GPU_LABEL_VALUE); + gpuLabel.put(K8sLabelConstants.NODE_GPU_MODEL_LABEL_KEY, gpuModel); + distributeTrainSpec.setNodeSelector(gpuLabel); } if (CollectionUtils.isNotEmpty(env)) { Set envNames = env.keySet(); @@ -384,7 +410,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { */ @Override public PtBaseResult deleteByResourceName(String namespace, String resourceName) { - LogUtil.info(LogEnum.BIZ_K8S, "deleteByResourceName namespace {} resourceName {}", namespace,resourceName); + LogUtil.info(LogEnum.BIZ_K8S, "deleteByResourceName namespace {} resourceName {}", namespace, resourceName); if (dtClient == null) { LogUtil.error(LogEnum.BIZ_K8S, "dtClient初始化失败"); } @@ -392,7 +418,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { return new BizDistributeTrain().baseErrorBadRequest(); } try { - k8sTaskService.deleteByNamespaceAndResourceName(namespace,resourceName); + k8sTaskService.deleteByNamespaceAndResourceName(namespace, resourceName); //根据条件获得对应的分布式训练资源集合 DistributeTrainList list = dtClient.inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list(); List items = list.getItems(); @@ -486,7 +512,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { @Override public boolean delete(String crYaml) { try { - LogUtil.info(LogEnum.BIZ_K8S, "delete crYaml {}",crYaml); + LogUtil.info(LogEnum.BIZ_K8S, "delete crYaml {}", crYaml); return dtClient.load(new ByteArrayInputStream(crYaml.getBytes())).delete(); } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "Delete DistributeTrain error:{} ,yml:{}", e.getMessage(), crYaml); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java index 1ff656f..a6fcff0 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java @@ -47,6 +47,7 @@ import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.PtModelOptimizationDeploymentBO; import org.dubhe.k8s.domain.resource.BizDeployment; import org.dubhe.k8s.enums.ImagePullPolicyEnum; @@ -57,9 +58,11 @@ import org.dubhe.k8s.enums.LimitsOfResourcesEnum; import org.dubhe.k8s.enums.RestartPolicyEnum; import org.dubhe.k8s.enums.ShellCommandEnum; import org.dubhe.k8s.utils.BizConvertUtils; +import org.dubhe.k8s.utils.K8sCommonUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; import org.dubhe.k8s.utils.YamlUtils; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import javax.annotation.Resource; @@ -76,6 +79,7 @@ import java.util.stream.Collectors; */ public class DubheDeploymentApiImpl implements DubheDeploymentApi { private K8sUtils k8sUtils; + private KubernetesClient client; @Autowired @@ -89,6 +93,9 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sCommonUtils k8sCommonUtils; + private static final String DATASET = "/dataset"; private static final String WORKSPACE = "/workspace"; private static final String OUTPUT = "/output"; @@ -112,11 +119,15 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { public BizDeployment create(PtModelOptimizationDeploymentBO bo) { try { LogUtil.info(LogEnum.BIZ_K8S, "Param of create:{}", bo); - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new BizDeployment().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } - LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + LackOfResourcesEnum lack = nodeApi.isAllocatable(baseResourceBo); if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { return new BizDeployment().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } @@ -192,7 +203,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { */ @Override public PtBaseResult deleteByResourceName(String namespace, String resourceName) { - LogUtil.info(LogEnum.BIZ_K8S, "Param of deleteByResourceName:namespace {} resourceName {}", namespace,resourceName); + LogUtil.info(LogEnum.BIZ_K8S, "Param of deleteByResourceName:namespace {} resourceName {}", namespace, resourceName); if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)) { return new PtBaseResult().baseErrorBadRequest(); } @@ -247,8 +258,9 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { this.resourcesLimitsMap = Maps.newHashMap(); Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); - Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); + Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(bo.getK8sLabelKey(), new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); + k8sCommonUtils.addRdmaResource(resourcesLimitsMap); this.businessLabel = bo.getBusinessLabel(); this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel); @@ -413,8 +425,8 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { .withName(PVC_DATASET) .withNewHostPath() .withPath(datasetDir) - .withType(K8sParamConstants.HOST_PATH_TYPE) - .endHostPath() + .withType(K8sParamConstants.HOST_PATH_TYPE) + .endHostPath() .build()); } if (StrUtil.isNotBlank(workspaceDir)) { diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java index 2d82e0a..ced5b10 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java @@ -66,7 +66,9 @@ import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.PtJupyterResourceBO; +import org.dubhe.k8s.domain.bo.PtMountDirBO; import org.dubhe.k8s.domain.bo.PtPersistentVolumeClaimBO; import org.dubhe.k8s.domain.bo.TaskYamlBO; import org.dubhe.k8s.domain.entity.K8sTask; @@ -77,10 +79,13 @@ import org.dubhe.k8s.enums.K8sKindEnum; import org.dubhe.k8s.enums.K8sResponseEnum; import org.dubhe.k8s.enums.LackOfResourcesEnum; import org.dubhe.k8s.enums.LimitsOfResourcesEnum; +import org.dubhe.k8s.service.K8sGpuConfigService; import org.dubhe.k8s.service.K8sTaskService; +import org.dubhe.k8s.utils.K8sCommonUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; import org.dubhe.k8s.utils.YamlUtils; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import javax.annotation.Resource; @@ -121,16 +126,18 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { private ResourceQuotaApi resourceQuotaApi; @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sGpuConfigService k8sGpuConfigService; + @Autowired + private K8sCommonUtils k8sCommonUtils; private static final String DATASET = "/dataset"; private static final String WORKSPACE = "/workspace"; private static final String DSHM_PATH = "/dev/shm"; - private static final String K8S_PIP_SITE_PACKAGE = "/home/admin/.local/lib/python3.8/site-packages"; private static final String PVC_DATASET = "pvc-dataset"; private static final String PVC_WORKSPACE = "pvc-workspace"; - private static final String PVC_PIP_SITE_PACKAGE = "pvc-pip-site-package"; private static final String CONTAINER_NAME = "web"; private static final Integer CONTAINER_PORT = 8888; @@ -158,19 +165,27 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { return new PtJupyterDeployVO().error(K8sResponseEnum.BAD_REQUEST.getCode(), K8sResponseEnum.BAD_REQUEST.getMessage()); } - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } - LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + if (bo.getUseGpu()) { + Integer k8sGpuNumLimit = k8sGpuConfigService.getGpuLimit(bo.getNamespace(), bo.getGpuModel(), bo.getK8sLabelKey()); + if(bo.getGpuNum() > k8sGpuNumLimit){ + return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), LimitsOfResourcesEnum.LIMITS_OF_GPU.getMessage()); + } + } + LackOfResourcesEnum lack = nodeApi.isAllocatable(baseResourceBo); if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } + resourceCache.deletePodCacheByResourceName(bo.getNamespace(), bo.getName()); - if (!fileStoreApi.createDirs(bo.getWorkspaceDir(), bo.getDatasetDir(),bo.getPipSitePackageDir())) { - return new PtJupyterDeployVO().error(K8sResponseEnum.INTERNAL_SERVER_ERROR.getCode(), K8sResponseEnum.INTERNAL_SERVER_ERROR.getMessage()); + if(!fileStoreApi.fileOrDirIsExist(bo.getWorkspaceDir())){ + fileStoreApi.createDir(bo.getWorkspaceDir()); } - resourceCache.deletePodCacheByResourceName(bo.getNamespace(), bo.getName()); PtJupyterDeployVO result = new JupyterDeployer(bo).buildFsVolumes().deploy(); LogUtil.info(LogEnum.BIZ_K8S, "Return value of creating Notebook create:{}", result); return result; @@ -189,11 +204,20 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { @Override public PtJupyterDeployVO createWithPvc(PtJupyterResourceBO bo) { try { - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } - LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + if (bo.getUseGpu()) { + Integer k8sGpuNumLimit = k8sGpuConfigService.getGpuLimit(bo.getNamespace(), bo.getGpuModel(), bo.getK8sLabelKey()); + if(bo.getGpuNum() > k8sGpuNumLimit){ + return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), LimitsOfResourcesEnum.LIMITS_OF_GPU.getMessage()); + } + } + LackOfResourcesEnum lack = nodeApi.isAllocatable(baseResourceBo); if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } @@ -230,12 +254,14 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { */ @Override public PtBaseResult delete(String namespace, String resourceName) { - LogUtil.info(LogEnum.BIZ_K8S, "Param of delete namespace {} resourceName {}", namespace,resourceName); + LogUtil.info(LogEnum.BIZ_K8S, "Param of delete namespace {} resourceName {}", namespace, resourceName); try { Boolean res = client.extensions().ingresses().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete() && client.services().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete() && client.apps().statefulSets().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete() && client.secrets().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete(); + persistentVolumeClaimApi.deletePvcByResourceName(namespace,resourceName); + persistentVolumeClaimApi.deletePvByResourceName(resourceName); k8sTaskService.deleteByNamespaceAndResourceName(namespace,resourceName); if (res) { return new PtBaseResult(); @@ -305,12 +331,11 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { private String image; private String datasetDir; private String datasetMountPath; - private String pipSitePackageDir; - private String pipSitePackageMountPath; private String workspaceMountPath; private String workspaceDir; private Boolean useGpu; private Quantity shmMemory; + private String gpuModel; //数据集默认只读 private boolean datasetReadOnly; @@ -339,8 +364,6 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { this.image = bo.getImage(); this.datasetDir = bo.getDatasetDir(); this.datasetMountPath = StringUtils.isEmpty(bo.getDatasetMountPath()) ? DATASET : bo.getDatasetMountPath(); - this.pipSitePackageDir=bo.getPipSitePackageDir(); - this.pipSitePackageMountPath=StringUtils.isEmpty(bo.getPipSitePackageMountPath()) ? K8S_PIP_SITE_PACKAGE : bo.getPipSitePackageMountPath(); this.workspaceDir = bo.getWorkspaceDir(); this.workspaceMountPath = StringUtils.isEmpty(bo.getWorkspaceMountPath()) ? WORKSPACE : bo.getWorkspaceMountPath(); Optional.ofNullable(bo.getDatasetReadOnly()).ifPresent(v -> datasetReadOnly = v); @@ -349,14 +372,16 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { if (bo.getUseGpu() != null && bo.getUseGpu() && null == bo.getGpuNum()) { bo.setGpuNum(0); } + this.gpuModel = bo.getGpuModel(); this.resourcesLimitsMap = Maps.newHashMap(); Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); - Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); + Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(bo.getK8sLabelKey(), new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); - this.shmMemory = new Quantity("1024",K8sParamConstants.MEM_UNIT); + k8sCommonUtils.addRdmaResource(resourcesLimitsMap); + this.shmMemory = new Quantity("1024", K8sParamConstants.MEM_UNIT); // 共享内存设置为容器内存的一半(参考 Linux 的默认设置) - Optional.ofNullable(bo.getMemNum()).ifPresent(v -> shmMemory.setAmount(String.valueOf(v/2))); + Optional.ofNullable(bo.getMemNum()).ifPresent(v -> shmMemory.setAmount(String.valueOf(v / 2))); this.host = k8sUtils.getHost(); this.businessLabel = bo.getBusinessLabel(); this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); @@ -439,7 +464,7 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { .addToData(ImmutableMap.of(K8sParamConstants.SECRET_PWD_KEY, base64Pwd, K8sParamConstants.SECRET_URL_KEY, base64BaseUrl)) .build(); - LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}", secretName); + LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml info is : {}", secretName, YamlUtils.dumpAsYaml(secret)); secret = client.secrets().create(secret); LogUtil.info(LogEnum.BIZ_K8S, "{} deployed successfully", secretName); } @@ -459,13 +484,11 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { .withReadOnly(datasetReadOnly) .build()); + BizPersistentVolumeClaim bizPersistentVolumeClaim = persistentVolumeClaimApi.createWithFsPv(new PtPersistentVolumeClaimBO(namespace,baseName,new PtMountDirBO(datasetDir,MagicNumConstant.ONE_HUNDRED+K8sParamConstants.MEM_UNIT_GI,true))); volumes.add(new VolumeBuilder() - .withName(PVC_DATASET) - .withNewHostPath() - .withPath(datasetDir) - .withType(K8sParamConstants.HOST_PATH_TYPE) - .endHostPath() - .build()); + .withName(PVC_DATASET) + .withNewPersistentVolumeClaim(bizPersistentVolumeClaim.getName(), true) + .build()); } } @@ -484,25 +507,6 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { .build()); } - /** - * 挂载pip包路径 - */ - private void buildPipSitePackageFsVolume(){ - if (StrUtil.isNotBlank(pipSitePackageDir)) { - volumeMounts.add(new VolumeMountBuilder() - .withName(PVC_PIP_SITE_PACKAGE) - .withMountPath(pipSitePackageMountPath) - .build()); - - volumes.add(new VolumeBuilder() - .withName(PVC_PIP_SITE_PACKAGE) - .withNewHostPath() - .withPath(pipSitePackageDir) - .withType(K8sParamConstants.HOST_PATH_TYPE) - .endHostPath() - .build()); - } - } /** * 构建VolumeMount @@ -517,8 +521,8 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { volumes.add(new VolumeBuilder() .withName(PVC_WORKSPACE) .withNewHostPath() - .withPath(workspaceDir) - .withType(K8sParamConstants.HOST_PATH_TYPE) + .withPath(workspaceDir) + .withType(K8sParamConstants.HOST_PATH_TYPE) .endHostPath() .build()); } @@ -548,7 +552,6 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { * @return JupyterDeployer Notebook 部署类 */ private JupyterDeployer buildFsVolumes() { - buildPipSitePackageFsVolume(); buildDatasetFsVolume(); buildWorkspaceFsVolume(); buildShmFsVolume(); @@ -561,7 +564,6 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { * @return JupyterDeployer Notebook 部署类 */ private JupyterDeployer buildFsPvcVolumes() { - buildPipSitePackageFsVolume(); buildDatasetFsVolume(); buildWorkspaceFsPvcVolume(); buildShmFsVolume(); @@ -588,13 +590,19 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { Container container = new Container(); container.setName(statefulSetName); container.setImage(image); - container.setImagePullPolicy(ImagePullPolicyEnum.IFNOTPRESENT.getPolicy()); + container.setImagePullPolicy(ImagePullPolicyEnum.ALWAYS.getPolicy()); //端口映射 container.setPorts(Arrays.asList(new ContainerPortBuilder() .withContainerPort(CONTAINER_PORT) .withName(CONTAINER_NAME).build())); container.setVolumeMounts(volumeMounts); + List cmd = new ArrayList<>(); + cmd.add("/bin/bash"); + cmd.add("-c"); + cmd.add("mv /root/README.md /workspace/ ; /root/miniconda3/bin/python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && /root/miniconda3/bin/jupyter-lab --ip=0.0.0.0 --port=8888 --allow-root --no-browser "); + container.setCommand(cmd); + //环境变量 List env = new ArrayList(); env.add(new EnvVarBuilder().withName(K8sParamConstants.ENV_PWD_KEY) @@ -617,7 +625,7 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { .build()); Map gpuLabel = new HashMap<>(2); if (useGpu) { - gpuLabel.put(K8sLabelConstants.NODE_GPU_LABEL_KEY, K8sLabelConstants.NODE_GPU_LABEL_VALUE); + gpuLabel.put(K8sLabelConstants.NODE_GPU_MODEL_LABEL_KEY, gpuModel); } statefulSet = new StatefulSetBuilder() diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/LogMonitoringApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/LogMonitoringApiImpl.java index a61b2d8..e357b4f 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/LogMonitoringApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/LogMonitoringApiImpl.java @@ -18,12 +18,14 @@ package org.dubhe.k8s.api.impl; import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; import io.fabric8.kubernetes.api.model.DoneablePod; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.dsl.PodResource; import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.enums.BizEnum; import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.base.utils.TimeTransferUtil; import org.dubhe.biz.log.enums.LogEnum; @@ -39,6 +41,10 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.client.indices.CreateIndexRequest; +import org.elasticsearch.client.indices.CreateIndexResponse; +import org.elasticsearch.client.indices.GetIndexRequest; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilders; @@ -48,12 +54,9 @@ import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.sort.SortOrder; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; - -import java.io.IOException; import java.text.SimpleDateFormat; import java.util.*; import java.util.stream.Collectors; - import static org.dubhe.biz.base.constant.MagicNumConstant.ZERO; import static org.dubhe.biz.base.constant.MagicNumConstant.*; import static org.dubhe.biz.base.constant.SymbolConstant.*; @@ -78,6 +81,14 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { private KubernetesClient kubernetesClient; private static final String INDEX_NAME = "kubelogs"; + private static final String TADL_INDEX_NAME = "tadllogs"; + private static final String INDEX_SHARDS_NUMBER = "index.number_of_shards"; + private static final String INDEX_REPLICAS_NUMBER = "index.number_of_replicas"; + private static final String TYPE = "type"; + private static final String TEXT = "text"; + private static final String DATE = "date"; + private static final String PROPERTIES = "properties"; + private static final String EXPERIMENT_ID = "experimentId"; private static final String POD_NAME_KEY = "kubernetes.pod_name.keyword"; private static final String POD_NAME = "kubernetes.pod_name"; private static final String NAMESPACE_KEY = "kubernetes.namespace_name.keyword"; @@ -86,6 +97,12 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { private static final String MESSAGE = "log"; private static final String LOG_PREFIX = "[Dubhe Service Log] "; private static final String INDEX_FORMAT = "yyyy.MM.dd"; + private static final String TIMESTAMP_FORMAT = "yyyy-MM-dd HH:mm:ss.sss"; + private static final String LOG_FORMAT = "[Dubhe Service Log]-[%s]-%s"; + private static final String KUBERNETES_KEY = "kubernetes"; + private static final String KUBERNETES_POD_NAME_KEY = "pod_name"; + private static final String BUSINESS_KEY = "kubernetes.labels.platform/business.keyword"; + private static final String BUSINESS_GROUP_ID_KEY = "kubernetes.labels.platform/business-group-id.keyword"; public LogMonitoringApiImpl(K8sUtils k8sUtils) { this.kubernetesClient = k8sUtils.getClient(); @@ -106,7 +123,7 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { LogUtil.error(LogEnum.BIZ_K8S, "LogMonitoringApiImpl.addLogsToEs error: param [podName] and [namespace] are required"); return false; } - List logList = searchLogInfoByEs(ZERO, ONE, new LogMonitoringBO(namespace,podName)); + List logList = searchLogInfoByEs(ZERO, MagicNumConstant.ONE, new LogMonitoringBO(namespace,podName)); if (CollectionUtils.isNotEmpty(logList)) { return true; } @@ -151,7 +168,7 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { /**通过restHighLevelClient发送http的请求批量创建文档**/ restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); - } catch (IOException e) { + } catch (Exception e) { LogUtil.error(LogEnum.BIZ_K8S, "LogMonitoringApi.addLogsToEs error:{}", e); return false; } @@ -169,7 +186,7 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { @Override public LogMonitoringVO searchLogByResName(int from, int size, LogMonitoringBO logMonitoringBo) { List logList = new ArrayList<>(); - LogMonitoringVO logMonitoringResult = new LogMonitoringVO(ZERO_LONG, logList); + LogMonitoringVO logMonitoringResult = new LogMonitoringVO(ZERO, logList); String namespace = logMonitoringBo.getNamespace(); String resourceName = logMonitoringBo.getResourceName(); if (StringUtils.isBlank(resourceName) || StringUtils.isBlank(namespace)) { @@ -195,7 +212,7 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { } logMonitoringResult.setLogs(logList); - logMonitoringResult.setTotalLogs(Long.valueOf(logList.size())); + logMonitoringResult.setTotalLogs(logList.size()); return logMonitoringResult; } @@ -212,7 +229,23 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { LogMonitoringVO logMonitoringResult = new LogMonitoringVO(); List logs = searchLogInfoByEs(from, size, logMonitoringBo); logMonitoringResult.setLogs(logs); - logMonitoringResult.setTotalLogs(Long.valueOf(logs.size())); + logMonitoringResult.setTotalLogs(logs.size()); + return logMonitoringResult; + + } + + /** + * 日志查询方法 + * + * @param logMonitoringBo 日志查询bo + * @return LogMonitoringVO 日志查询结果类 + */ + @Override + public LogMonitoringVO searchLog(LogMonitoringBO logMonitoringBo) { + LogMonitoringVO logMonitoringResult = new LogMonitoringVO(); + List logs = searchLogInfoByEs(logMonitoringBo); + logMonitoringResult.setLogs(logs); + logMonitoringResult.setTotalLogs(logs.size()); return logMonitoringResult; } @@ -236,6 +269,127 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { } /** + * 添加 TADL 服务日志到 Elasticsearch + * + * @param experimentId 日志查询起始值,初始值为1,表示从第一条日志记录开始查询 + * @param log 日志 + * @return boolean 日志添加是否成功 + */ + @Override + public boolean addTadlLogsToEs(long experimentId, String log) { + + + Date date = new Date(); + String timestamp = TimeTransferUtil.dateTransferToUtc(date); + BulkRequest bulkRequest = new BulkRequest(); + try { + /**查询索引是否存在, 不存在则创建**/ + GetIndexRequest getIndexRequest = new GetIndexRequest(TADL_INDEX_NAME); + boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT); + if (!exists){ + CreateIndexRequest createIndexRequest = new CreateIndexRequest(TADL_INDEX_NAME); + createIndexRequest.settings(Settings.builder() + .put(INDEX_SHARDS_NUMBER, 3) + .put(INDEX_REPLICAS_NUMBER, 2) + ); + Map timestampMapping = new HashMap<>(); + timestampMapping.put(TYPE, DATE); + Map logMapping = new HashMap<>(); + logMapping.put(TYPE, TEXT); + Map experimentIdMapping = new HashMap<>(); + experimentIdMapping.put(TYPE, TEXT); + Map properties = new HashMap<>(); + properties.put(TIMESTAMP,timestampMapping); + properties.put(EXPERIMENT_ID,experimentIdMapping); + properties.put(MESSAGE,logMapping); + Map mapping = new HashMap<>(); + mapping.put(PROPERTIES, properties); + createIndexRequest.mapping(mapping); + CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(createIndexRequest, RequestOptions.DEFAULT); + + } + LinkedHashMap jsonMap = new LinkedHashMap() {{ + put(EXPERIMENT_ID, experimentId); + put(MESSAGE, new SimpleDateFormat(TIMESTAMP_FORMAT).format(date) + SPACE + log); + put(TIMESTAMP, timestamp); + }}; + + /**添加索引创建对象到bulkRequest**/ + bulkRequest.add(new IndexRequest(TADL_INDEX_NAME).source(jsonMap)); + + /**通过restHighLevelClient发送http的请求创建文档**/ + restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "LogMonitoringApi.addTadlLogsToEs error:{}", e); + return false; + } + return true; + } + + + + /** + * TADL 服务日志查询方法 + * + * @param from 日志查询起始值,初始值为1,表示从第一条日志记录开始查询 + * @param size 日志查询记录数 + * @param experimentId TADL Experiment ID + * @return LogMonitoringVO 日志查询结果类 + */ + @Override + public LogMonitoringVO searchTadlLogById(int from, int size, long experimentId) { + List logList = new ArrayList<>(); + LogMonitoringVO logMonitoringResult = new LogMonitoringVO(ZERO, logList); + /**处理查询范围参数起始值**/ + from = from <= MagicNumConstant.ZERO ? MagicNumConstant.ZERO : --from; + size = size <= MagicNumConstant.ZERO || size > TEN_THOUSAND ? TEN_THOUSAND : size; + /**创建搜索请求对象**/ + SearchRequest searchRequest = new SearchRequest(); + searchRequest.indices(TADL_INDEX_NAME); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.trackTotalHits(true).from(from).size(size); + /**根据时间戳排序**/ + searchSourceBuilder.sort(TIMESTAMP, SortOrder.ASC); + /**创建布尔查询对象**/ + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + + boolQueryBuilder.filter(QueryBuilders.matchQuery(EXPERIMENT_ID, experimentId)); + + /**设置boolQueryBuilder到searchSourceBuilder**/ + searchSourceBuilder.query(boolQueryBuilder); + + searchRequest = searchRequest.source(searchSourceBuilder); + /**执行搜索**/ + SearchResponse searchResponse; + try { + searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "LogMonitoringApiImpl.searchTadlLogById error,param:[experimentId]={}, error:{}", experimentId, e); + return logMonitoringResult; + } + /**获取响应结果**/ + SearchHits hits = searchResponse.getHits(); + + SearchHit[] searchHits = hits.getHits(); + if (searchHits.length == MagicNumConstant.ZERO) { + return logMonitoringResult; + } + + for (SearchHit hit : searchHits) { + /**源文档**/ + Map sourceAsMap = hit.getSourceAsMap(); + /**取出message**/ + String message = (String) sourceAsMap.get(MESSAGE); + message = message.replace(LINEBREAK, BLANK); + /**添加日志信息到集合**/ + logList.add(message); + } + logMonitoringResult.setLogs(logList); + logMonitoringResult.setTotalLogs(logList.size()); + return logMonitoringResult; + } + + /** * 得到日志信息String * * @param podName Pod名称 @@ -256,6 +410,51 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { return null; } + /** + * 从Elasticsearch查询日志 + * + * @param logMonitoringBo 日志查询bo + * @return List 日志集合 + */ + private List searchLogInfoByEs(LogMonitoringBO logMonitoringBo) { + + List logList = new ArrayList<>(); + + SearchRequest searchRequest = buildSearchRequest(logMonitoringBo); + /**执行搜索**/ + SearchResponse searchResponse; + try { + searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "LogMonitoringApiImpl.searchLogInfoByEs error,param:[logMonitoringBo]={}, error:{}", JSON.toJSONString(logMonitoringBo), e); + return logList; + } + /**获取响应结果**/ + SearchHits hits = searchResponse.getHits(); + + SearchHit[] searchHits = hits.getHits(); + if (searchHits.length == MagicNumConstant.ZERO) { + return logList; + } + + for (SearchHit hit : searchHits) { + + String esResult = hit.getSourceAsString(); + JSONObject jsonObject = JSON.parseObject(esResult); + String message = jsonObject.getString(MESSAGE); + message = message.replace(LINEBREAK, BLANK); + + String podName = jsonObject.getJSONObject(KUBERNETES_KEY). + getString(KUBERNETES_POD_NAME_KEY); + + /**拼接日志信息**/ + String logString = String.format(LOG_FORMAT, podName, message); + /**添加日志信息到集合**/ + logList.add(logString); + } + return logList; + } + /** * 从Elasticsearch查询日志 @@ -366,4 +565,74 @@ public class LogMonitoringApiImpl implements LogMonitoringApi { return searchRequest.source(searchSourceBuilder); } + /** + * 构建搜索请求对象 + * + * @param logMonitoringBo 日志查询bo + * @return SearchRequest ES搜索请求对象 + */ + private SearchRequest buildSearchRequest(LogMonitoringBO logMonitoringBo) { + + /**创建搜索请求对象**/ + SearchRequest searchRequest = new SearchRequest(); + searchRequest.indices(INDEX_NAME); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.trackTotalHits(true).from(logMonitoringBo.getFrom()).size(logMonitoringBo.getSize()); + + /**根据时间戳排序**/ + searchSourceBuilder.sort(TIMESTAMP, SortOrder.ASC); + /**过虑源字段**/ + String[] sourceFieldArray = sourceField.split(COMMA); + + searchSourceBuilder.fetchSource(sourceFieldArray, new String[]{}); + + /**创建布尔查询对象**/ + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + + /**添加podName查询条件**/ + Set podNames = logMonitoringBo.getPodNames(); + if (CollectionUtils.isNotEmpty(podNames)) { + boolQueryBuilder.filter(QueryBuilders.termsQuery(POD_NAME_KEY, podNames.toArray(new String[podNames.size()]))); + } + /**添加namespace查询条件**/ + String namespace = logMonitoringBo.getNamespace(); + if (StringUtils.isNotEmpty(namespace)) { + boolQueryBuilder.filter(QueryBuilders.matchQuery(NAMESPACE_KEY, namespace)); + } + /**添加业务查询条件**/ + BizEnum business = logMonitoringBo.getBusiness(); + if (null != business) { + boolQueryBuilder.filter(QueryBuilders.termQuery(BUSINESS_KEY, business.getBizCode())); + } + /**添加实验Id查询条件**/ + String businessGroupId = logMonitoringBo.getBusinessGroupId(); + if (StringUtils.isNotEmpty(businessGroupId)) { + boolQueryBuilder.filter(QueryBuilders.termQuery(BUSINESS_GROUP_ID_KEY, businessGroupId)); + } + /**添加关键字查询条件**/ + String logKeyword = logMonitoringBo.getLogKeyword(); + if (StringUtils.isNotEmpty(logKeyword)) { + boolQueryBuilder.filter(QueryBuilders.matchQuery(MESSAGE, logKeyword).operator(Operator.AND)); + } + /**添加时间范围查询条件**/ + Long beginTimeMillis = logMonitoringBo.getBeginTimeMillis(); + Long endTimeMillis = logMonitoringBo.getEndTimeMillis(); + if (beginTimeMillis != null || endTimeMillis != null){ + beginTimeMillis = beginTimeMillis == null ? ZERO_LONG : beginTimeMillis; + endTimeMillis = endTimeMillis == null ? System.currentTimeMillis() : endTimeMillis; + + /**将毫秒值转换为UTC时间**/ + String beginUtcTime = TimeTransferUtil.dateTransferToUtc(new Date(beginTimeMillis)); + String endUtcTime = TimeTransferUtil.dateTransferToUtc(new Date(endTimeMillis)); + boolQueryBuilder.filter(QueryBuilders.rangeQuery(TIMESTAMP).gte(beginUtcTime).lte(endUtcTime)); + } + + + /**设置boolQueryBuilder到searchSourceBuilder**/ + searchSourceBuilder.query(boolQueryBuilder); + + return searchRequest.source(searchSourceBuilder); + } + + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java index 2e3ee27..dfb64b9 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java @@ -29,17 +29,27 @@ import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.biz.base.functional.StringFormat; import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.base.vo.UserAllotVO; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.k8s.api.MetricsApi; import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.config.PromethuesConfig; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.bo.PrometheusMetricBO; import org.dubhe.k8s.domain.dto.PodQueryDTO; import org.dubhe.k8s.domain.resource.BizContainer; import org.dubhe.k8s.domain.resource.BizPod; import org.dubhe.k8s.domain.resource.BizQuantity; -import org.dubhe.k8s.domain.vo.*; +import org.dubhe.k8s.domain.vo.GpuMetricsDataResultVO; +import org.dubhe.k8s.domain.vo.GpuTotalMemResultVO; +import org.dubhe.k8s.domain.vo.GpuUsageVO; +import org.dubhe.k8s.domain.vo.GpuValueVO; +import org.dubhe.k8s.domain.vo.MetricsDataResultValueVO; +import org.dubhe.k8s.domain.vo.PodRangeMetricsVO; +import org.dubhe.k8s.domain.vo.PtContainerMetricsVO; +import org.dubhe.k8s.domain.vo.PtNodeMetricsVO; +import org.dubhe.k8s.domain.vo.PtPodsVO; import org.dubhe.k8s.utils.BizConvertUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.PrometheusUtil; @@ -47,7 +57,11 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.util.CollectionUtils; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; /** @@ -118,6 +132,9 @@ public class MetricsApiImpl implements MetricsApi { @Value("${k8s.prometheus.gpu-mem-use-range-query-param}") private String k8sPrometheusGpuMemUseRangeQueryParam; + @Autowired + private PromethuesConfig promethuesConfig; + public MetricsApiImpl(K8sUtils k8sUtils) { this.client = k8sUtils.getClient(); } @@ -552,4 +569,69 @@ public class MetricsApiImpl implements MetricsApi { return Collections.EMPTY_LIST; } } + + /** + * 查询某个节点的gpu使用率 + * + * + * @return List 节点gpu使用率集合 + */ + @Override + public Map> getNodeGpuUsage() { + Map> result = PrometheusUtil.getPrometheusQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryNodeParamMap(promethuesConfig.getK8sPrometheusGpuUsageQueryParam())); + return result; + } + + /** + * 查询用户资源使用率峰值 + * + * @param resourceType 资源类型(gpu、cpu、memory) + * @param sumDay 查询时间段 + * @return List 用户资源使用率VO实体类 + */ + @Override + public List getNamespaceUsageRate(Integer resourceType, String sumDay) { + PrometheusMetricBO prometheusMetricBO =new PrometheusMetricBO(); + switch (resourceType) { + case 1: + //GPU + prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getResourceUsageRateParamMap(promethuesConfig.getK8sGpuUsageRateQueryParam(), sumDay)); + break; + case 2: + //CPU + prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getResourceUsageRateParamMap(promethuesConfig.getK8sCpuUsageRateQueryParam(), sumDay)); + break; + case 3: + prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getResourceUsageRateParamMap(promethuesConfig.getK8sMemUsageRateQueryParam(), sumDay)); + break; + default: + break; + } + + if (prometheusMetricBO == null) { + return null; + } + return prometheusMetricBO.getUsageRateResults(); + } + + @Override + public Map getResourceUsageByUser(Integer resourceType, String sumDay, String namespaces) { + PrometheusMetricBO prometheusMetricBO = new PrometheusMetricBO(); + switch (resourceType) { + //GPU + case 1: + prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getResourceUsageParamMap(promethuesConfig.getK8sGpuUsageByNamespaceQueryParam(), sumDay, namespaces)); + break; + //CPU + case 2: + prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getResourceUsageParamMap(promethuesConfig.getK8sCpuUsageByNamespaceQueryParam(), sumDay, namespaces)); + break; + //memory + case 3: + prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getResourceUsageParamMap(promethuesConfig.getK8sMemUsageByNamespaceQueryParam(), sumDay, namespaces)); + break; + + } + return prometheusMetricBO.getResourceUsageResults(); + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java index 8062e9d..99c37c0 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java @@ -47,6 +47,7 @@ import org.dubhe.k8s.api.ResourceQuotaApi; import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.PtModelOptimizationJobBO; import org.dubhe.k8s.domain.bo.PtMountDirBO; import org.dubhe.k8s.domain.bo.PtPersistentVolumeClaimBO; @@ -60,9 +61,11 @@ import org.dubhe.k8s.enums.LimitsOfResourcesEnum; import org.dubhe.k8s.enums.RestartPolicyEnum; import org.dubhe.k8s.enums.ShellCommandEnum; import org.dubhe.k8s.utils.BizConvertUtils; +import org.dubhe.k8s.utils.K8sCommonUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; import org.dubhe.k8s.utils.YamlUtils; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import javax.annotation.Resource; @@ -92,6 +95,9 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sCommonUtils k8sCommonUtils; + public ModelOptJobApiImpl(K8sUtils k8sUtils) { this.k8sUtils = k8sUtils; this.client = k8sUtils.getClient(); @@ -106,11 +112,15 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { @Override public BizJob create(PtModelOptimizationJobBO bo) { try { - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new BizJob().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } - LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + LackOfResourcesEnum lack = nodeApi.isAllocatable(baseResourceBo); if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { return new BizJob().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } @@ -183,7 +193,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { */ @Override public PtBaseResult deleteByResourceName(String namespace, String resourceName) { - LogUtil.info(LogEnum.BIZ_K8S, "Param of deleteByResourceName namespace {} resourceName {}", namespace,resourceName); + LogUtil.info(LogEnum.BIZ_K8S, "Param of deleteByResourceName namespace {} resourceName {}", namespace, resourceName); if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)) { return new PtBaseResult().baseErrorBadRequest(); } @@ -231,6 +241,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); + k8sCommonUtils.addRdmaResource(resourcesLimitsMap); this.businessLabel = bo.getBusinessLabel(); this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); this.fsMounts = bo.getFsMounts(); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java index 40058a0..a39298a 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java @@ -54,6 +54,7 @@ import org.dubhe.k8s.api.ResourceQuotaApi; import org.dubhe.k8s.api.VolumeApi; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.BuildIngressBO; import org.dubhe.k8s.domain.bo.BuildFsVolumeBO; import org.dubhe.k8s.domain.bo.BuildServiceBO; @@ -67,10 +68,12 @@ import org.dubhe.k8s.enums.LimitsOfResourcesEnum; import org.dubhe.k8s.enums.RestartPolicyEnum; import org.dubhe.k8s.enums.ShellCommandEnum; import org.dubhe.k8s.utils.BizConvertUtils; +import org.dubhe.k8s.utils.K8sCommonUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; import org.dubhe.k8s.utils.ResourceBuildUtils; import org.dubhe.k8s.utils.YamlUtils; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; @@ -102,6 +105,9 @@ public class ModelServingApiImpl implements ModelServingApi { @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sCommonUtils k8sCommonUtils; + @Value("${k8s.serving.host}") String servingHost; @@ -126,8 +132,12 @@ public class ModelServingApiImpl implements ModelServingApi { @Override public ModelServingVO create(ModelServingBO bo) { try { + + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + baseResourceBo.setName(bo.getResourceName()); //资源配额校验 - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new ModelServingVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } @@ -216,9 +226,9 @@ public class ModelServingApiImpl implements ModelServingApi { @Override public PtBaseResult delete(String namespace, String resourceName) { try { - LogUtil.info(LogEnum.BIZ_K8S, "delete model serving namespace:{} resourceName:{}",namespace,resourceName); + LogUtil.info(LogEnum.BIZ_K8S, "delete model serving namespace:{} resourceName:{}", namespace, resourceName); DeploymentList deploymentList = client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list(); - if (deploymentList == null || deploymentList.getItems().size() == 0){ + if (deploymentList == null || deploymentList.getItems().size() == 0) { return new PtBaseResult(); } Boolean res = client.extensions().ingresses().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete() @@ -266,7 +276,7 @@ public class ModelServingApiImpl implements ModelServingApi { * @return Deployment */ private Deployment buildDeployment(ModelServingBO bo, VolumeVO volumeVO, String deploymentName) { - Map childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(),bo.getTaskIdentifyLabel()); + Map childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(), bo.getTaskIdentifyLabel()); LabelSelector labelSelector = new LabelSelector(); labelSelector.setMatchLabels(childLabels); return new DeploymentBuilder() @@ -307,6 +317,7 @@ public class ModelServingApiImpl implements ModelServingApi { Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); + k8sCommonUtils.addRdmaResource(resourcesLimitsMap); Container container = new ContainerBuilder() .withNewName(name) .withNewImage(bo.getImage()) diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java index 96d17c1..72d5533 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java @@ -24,8 +24,9 @@ import io.fabric8.kubernetes.api.model.NamespaceList; import io.fabric8.kubernetes.api.model.ResourceQuota; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientException; -import org.dubhe.biz.base.service.UserContextService; +import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.k8s.annotation.K8sValidation; import org.dubhe.k8s.api.NamespaceApi; import org.dubhe.k8s.api.ResourceQuotaApi; @@ -36,18 +37,11 @@ import org.dubhe.k8s.enums.ValidationTypeEnum; import org.dubhe.k8s.utils.BizConvertUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; -import org.dubhe.biz.log.utils.LogUtil; -import org.dubhe.biz.base.utils.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.util.CollectionUtils; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; /** @@ -61,19 +55,17 @@ public class NamespaceApiImpl implements NamespaceApi { @Autowired private ResourceQuotaApi resourceQuotaApi; - @Autowired - private UserContextService userContextService; - @Value("${user.config.cpu-limit}") private Integer cpuLimit; @Value("${user.config.memory-limit}") private Integer memoryLimit; - @Value("${user.config.gpu-limit}") - private Integer gpuLimit; - + @Value("${user.config.gpu-limit.k8s-label-key}") + private String k8sLabelKey; + @Value("${user.config.gpu-limit.gpu-num-limit}") + private Integer gpuNumLimit; public NamespaceApiImpl(K8sUtils k8sUtils) { this.client = k8sUtils.getClient(); @@ -90,15 +82,17 @@ public class NamespaceApiImpl implements NamespaceApi { public BizNamespace create(@K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) String namespace, Map labels) { try { BizNamespace bizNamespace = get(namespace); - if (bizNamespace != null){ + if (bizNamespace != null) { return bizNamespace; } Namespace ns = new NamespaceBuilder().withNewMetadata().withName(namespace).addToLabels(LabelUtils.getBaseLabels(namespace, labels)).endMetadata().build(); Namespace res = client.namespaces().create(ns); - resourceQuotaApi.create(res.getMetadata().getName(),res.getMetadata().getName(),cpuLimit,memoryLimit,gpuLimit); + Map gpuLimit = new HashMap<>(1); + gpuLimit.put(k8sLabelKey, gpuNumLimit); + resourceQuotaApi.create(res.getMetadata().getName(), res.getMetadata().getName(), cpuLimit, memoryLimit, gpuLimit); return BizConvertUtils.toBizNamespace(res); } catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "NamespaceApiImpl.create error, param:[namespace]={}, [labels]={},error:{}",namespace, labels, e); + LogUtil.error(LogEnum.BIZ_K8S, "NamespaceApiImpl.create error, param:[namespace]={}, [labels]={},error:{}", namespace, labels, e); return new BizNamespace().error(String.valueOf(e.getCode()), e.getMessage()); } } @@ -294,7 +288,7 @@ public class NamespaceApiImpl implements NamespaceApi { client.namespaces().withName(namespace).edit().editMetadata().addToLabels(labels).endMetadata().done(); return new PtBaseResult(); } catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "NamespaceApiImpl.addLabels error, param:[namespace]={}, [labels]={},error:{}",namespace, JSON.toJSONString(labels), e); + LogUtil.error(LogEnum.BIZ_K8S, "NamespaceApiImpl.addLabels error, param:[namespace]={}, [labels]={},error:{}", namespace, JSON.toJSONString(labels), e); return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage()); } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NodeApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NodeApiImpl.java index f362822..810680c 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NodeApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NodeApiImpl.java @@ -29,7 +29,6 @@ import io.fabric8.kubernetes.api.model.Toleration; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientException; import org.dubhe.biz.base.constant.MagicNumConstant; -import org.dubhe.biz.base.constant.NumberConstant; import org.dubhe.biz.base.service.UserContextService; import org.dubhe.biz.base.utils.SpringContextHolder; import org.dubhe.biz.base.utils.StringUtils; @@ -40,6 +39,7 @@ import org.dubhe.k8s.api.NodeApi; import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.resource.BizNode; import org.dubhe.k8s.domain.resource.BizTaint; import org.dubhe.k8s.domain.vo.PtNodeMetricsVO; @@ -53,7 +53,13 @@ import org.dubhe.k8s.utils.ResourceBuildUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.util.CollectionUtils; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -158,7 +164,7 @@ public class NodeApiImpl implements NodeApi { client.nodes().withName(nodeName).edit().editMetadata().addToLabels(labels).endMetadata().done(); return new PtBaseResult(); } catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.addLabels error, param:[nodeName]={}, [labels]={},error:{}",nodeName, JSON.toJSONString(labels), e); + LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.addLabels error, param:[nodeName]={}, [labels]={},error:{}", nodeName, JSON.toJSONString(labels), e); return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage()); } } @@ -180,7 +186,7 @@ public class NodeApiImpl implements NodeApi { client.nodes().withName(nodeName).edit().editMetadata().removeFromLabels(labelKey).endMetadata().done(); return new PtBaseResult(); } catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.deleteLabel error, param:[nodeName]={}, [labelKey]={},error:{}",nodeName, labelKey, e); + LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.deleteLabel error, param:[nodeName]={}, [labelKey]={},error:{}", nodeName, labelKey, e); return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage()); } } @@ -223,16 +229,16 @@ public class NodeApiImpl implements NodeApi { public List getWithLabel(String key, String value) { try { List bizNodes = new ArrayList<>(); - if (StringUtils.isEmpty(key) || StringUtils.isEmpty(value)){ + if (StringUtils.isEmpty(key) || StringUtils.isEmpty(value)) { return bizNodes; } - NodeList nodeList = client.nodes().withLabel(key,value).list(); - if (nodeList != null && !CollectionUtils.isEmpty(nodeList.getItems())){ + NodeList nodeList = client.nodes().withLabel(key, value).list(); + if (nodeList != null && !CollectionUtils.isEmpty(nodeList.getItems())) { return BizConvertUtils.toBizNodes(nodeList.getItems()); } return bizNodes; } catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.getWithLabels error, param:[key]={} [value]={},error:{}", key,value, e); + LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.getWithLabels error, param:[key]={} [value]={},error:{}", key, value, e); return new ArrayList<>(); } } @@ -247,11 +253,11 @@ public class NodeApiImpl implements NodeApi { public List getWithLabels(Map labels) { try { List bizNodes = new ArrayList<>(); - if (CollectionUtils.isEmpty(labels)){ + if (CollectionUtils.isEmpty(labels)) { return bizNodes; } NodeList nodeList = client.nodes().withLabels(labels).list(); - if (nodeList != null && !CollectionUtils.isEmpty(nodeList.getItems())){ + if (nodeList != null && !CollectionUtils.isEmpty(nodeList.getItems())) { return BizConvertUtils.toBizNodes(nodeList.getItems()); } return bizNodes; @@ -293,27 +299,25 @@ public class NodeApiImpl implements NodeApi { * * @param nodeSelector 节点选择标签 * @param taints 该资源所能容忍的污点 - * @param cpuNum 单位为m 1核等于1000m - * @param memNum 单位为Mi 1Mi等于1024Ki - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 资源通用属性基类 * @return LackOfResourcesEnum 资源缺乏枚举类 */ @Override - public LackOfResourcesEnum isAllocatable(Map nodeSelector, List taints, Integer cpuNum, Integer memNum, Integer gpuNum) { - LogUtil.info(LogEnum.BIZ_K8S, "Input nodeSelector={};taints={};cpuNum={};memNum={};gpuNum={}", JSON.toJSONString(nodeSelector), JSON.toJSONString(taints), cpuNum, memNum, gpuNum); + public LackOfResourcesEnum isAllocatable(Map nodeSelector, List taints, BaseResourceBo baseResourceBo) { + LogUtil.info(LogEnum.BIZ_K8S, "Input nodeSelector={};taints={};cpuNum={};memNum={};gpuNum={}", JSON.toJSONString(nodeSelector), JSON.toJSONString(taints), baseResourceBo.getCpuNum() + , baseResourceBo.getMemNum(), baseResourceBo.getGpuNum()); NodeList list; try { list = client.nodes().list(); - }catch (KubernetesClientException e) { + } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.isAllocatable error:{}", e); return LackOfResourcesEnum.LACK_OF_NODE; } List nodeItems = list.getItems(); //根据nodeSelector筛选节点 - if (CollectionUtil.isNotEmpty(nodeSelector) && nodeSelector.size() > NumberConstant.NUMBER_1){ - return LackOfResourcesEnum.LACK_OF_NODE; - } else if (CollectionUtil.isNotEmpty(nodeSelector) && nodeSelector.size() == NumberConstant.NUMBER_1){ + + if (CollectionUtil.isNotEmpty(nodeSelector)) { for (String nodeSelectorKey : nodeSelector.keySet()) { String nodeSelectorValue = nodeSelector.get(nodeSelectorKey); nodeItems = nodeItems.stream().filter(nodeItem -> nodeSelectorValue.equals(nodeItem.getMetadata().getLabels().get(nodeSelectorKey))) @@ -321,12 +325,12 @@ public class NodeApiImpl implements NodeApi { } } //根据可容忍的污点筛选节点 - if (CollectionUtil.isEmpty(taints)){ + if (CollectionUtil.isEmpty(taints)) { nodeItems = nodeItems.stream().filter(nodeItem -> CollectionUtils.isEmpty(nodeItem.getSpec().getTaints())).collect(Collectors.toList()); } else { List taintNodes = new ArrayList<>(); for (BizTaint taint : taints) { - taintNodes.addAll( nodeItems.stream().filter(nodeItem -> doesTaintExit(nodeItem, taint)).collect(Collectors.toList())); + taintNodes.addAll(nodeItems.stream().filter(nodeItem -> doesTaintExit(nodeItem, taint)).collect(Collectors.toList())); } nodeItems = taintNodes; } @@ -334,23 +338,26 @@ public class NodeApiImpl implements NodeApi { if (CollectionUtils.isEmpty(nodeItems)) { return LackOfResourcesEnum.LACK_OF_NODE; } - if (cpuNum != null && cpuNum >= MagicNumConstant.ZERO) { - nodeItems = isCpuAllocatable(cpuNum, nodeItems); + if (baseResourceBo.getCpuNum() != null && baseResourceBo.getCpuNum() >= MagicNumConstant.ZERO) { + nodeItems = isCpuAllocatable(baseResourceBo.getCpuNum(), nodeItems); if (CollectionUtils.isEmpty(nodeItems)) { + LogUtil.info(LogEnum.BIZ_K8S, "isAllocatable "+ LackOfResourcesEnum.LACK_OF_CPU.getMessage()+" nodeSelector={} taints={} baseResourceBo={}",nodeSelector,taints,baseResourceBo); return LackOfResourcesEnum.LACK_OF_CPU; } } - if (memNum != null && memNum >= MagicNumConstant.ZERO) { - nodeItems = isMemAllocatable(memNum, nodeItems); + if (baseResourceBo.getMemNum() != null && baseResourceBo.getMemNum() >= MagicNumConstant.ZERO) { + nodeItems = isMemAllocatable(baseResourceBo.getMemNum(), nodeItems); if (CollectionUtils.isEmpty(nodeItems)) { + LogUtil.info(LogEnum.BIZ_K8S, "isAllocatable "+ LackOfResourcesEnum.LACK_OF_MEM.getMessage()+" nodeSelector={} taints={} baseResourceBo={}",nodeSelector,taints,baseResourceBo); return LackOfResourcesEnum.LACK_OF_MEM; } } - if (gpuNum != null && gpuNum >= MagicNumConstant.ZERO) { - nodeItems = isGpuAllocatable(gpuNum, nodeItems); + if (baseResourceBo.getGpuNum() != null && baseResourceBo.getGpuNum() > MagicNumConstant.ZERO) { + nodeItems = isGpuAllocatable(baseResourceBo, nodeItems); if (CollectionUtils.isEmpty(nodeItems)) { + LogUtil.info(LogEnum.BIZ_K8S, "isAllocatable "+ LackOfResourcesEnum.LACK_OF_GPU.getMessage()+" nodeSelector={} taints={} baseResourceBo={}",nodeSelector,taints,baseResourceBo); return LackOfResourcesEnum.LACK_OF_GPU; } } @@ -361,37 +368,62 @@ public class NodeApiImpl implements NodeApi { /** * 查询集群资源是否充足 * - * @param cpuNum 单位为m 1核等于1000m - * @param memNum 单位为Mi 1Mi等于1024Ki - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 单位为m 资源通用属性基类 * @return LackOfResourcesEnum 资源缺乏枚举类 */ @Override - public LackOfResourcesEnum isAllocatable(Integer cpuNum, Integer memNum, Integer gpuNum) { + public LackOfResourcesEnum isAllocatable(BaseResourceBo baseResourceBo) { Toleration toleration = getNodeIsolationToleration(); - if (toleration == null){ - return isAllocatable(null,null,cpuNum,memNum,gpuNum); - }else { - return isAllocatable(getNodeIsolationNodeSelector(), geBizTaintListByUserId(),cpuNum,memNum,gpuNum); + Map nodeSelecter = new HashMap<>(); + + if (StrUtil.isNotEmpty(baseResourceBo.getGpuModel())) { + nodeSelecter.put(K8sLabelConstants.NODE_GPU_MODEL_LABEL_KEY, baseResourceBo.getGpuModel()); + } + + if (toleration == null) { + return isAllocatable(nodeSelecter, null, baseResourceBo); + } else { + nodeSelecter.putAll(getNodeIsolationNodeSelector()); + return isAllocatable(nodeSelecter, geBizTaintListByUserId(), baseResourceBo); } } /** + * 查询集群资源是否充足 + * + * @param namespace 命名空间 + * @param cpuNum cpu限制 单位核 0表示不限制 + * @param memNum 内存限制 单位G 0表示不限制 + * @param k8sLabelKey k8s GPU资源标签key值(例如:nvidia.com/gpu) + * @param gpuNum GPU数量,0表示共享显卡,null表示不使用显卡 + * @param gpuModel gpu型号 + * @return LackOfResourcesEnum 资源缺乏枚举类 + */ + @Override + public LackOfResourcesEnum isAllocatableConvert(String namespace, Integer cpuNum, Integer memNum, Boolean useGpu, String k8sLabelKey, String gpuModel, Integer gpuNum) { + BaseResourceBo bo = new BaseResourceBo(); + bo.setNamespace(namespace).setCpuNum(cpuNum).setMemNum(memNum).setUseGpu(useGpu).setK8sLabelKey(k8sLabelKey).setGpuModel(gpuModel).setGpuNum(gpuNum); + return isAllocatable(bo); + } + + + /** * 判断是否超出总可分配gpu数 - * @param gpuNum + * @param gpuNum gpu数量 + * @param gpuModel gpu型号 * @return LackOfResourcesEnum 资源缺乏枚举类 */ @Override - public LackOfResourcesEnum isOutOfTotalAllocatableGpu(Integer gpuNum){ - Integer remainingGpuNum = getTotalGpuNum() - getAllocatedGpuNum(); - if (gpuNum > remainingGpuNum){ + public LackOfResourcesEnum isOutOfTotalAllocatableGpu(String k8sLabelKey, String gpuModel, Integer gpuNum) { + Integer remainingGpuNum = getTotalGpuNum(k8sLabelKey, gpuModel) - getAllocatedGpuNum(k8sLabelKey, gpuModel); + if (gpuNum > remainingGpuNum) { return LackOfResourcesEnum.LACK_OF_GPU; - }else { + } else { return LackOfResourcesEnum.ADEQUATE; } } - /** + /** * 添加污点 * * @param nodeName 节点名称 @@ -401,23 +433,23 @@ public class NodeApiImpl implements NodeApi { @Override public BizNode taint(String nodeName, List bizTaintList) { try { - if (StringUtils.isEmpty(nodeName) || org.springframework.util.CollectionUtils.isEmpty(bizTaintList)){ + if (StringUtils.isEmpty(nodeName) || org.springframework.util.CollectionUtils.isEmpty(bizTaintList)) { return new BizNode().errorBadRequest(); } Node nodeInfo = client.nodes().withName(nodeName).get(); - if (nodeInfo == null){ - return new BizNode().error(K8sResponseEnum.NOT_FOUND.getCode(), "节点["+nodeName+"]不存在"); + if (nodeInfo == null) { + return new BizNode().error(K8sResponseEnum.NOT_FOUND.getCode(), "节点[" + nodeName + "]不存在"); } List oldTaints = nodeInfo.getSpec().getTaints(); - for (Taint taint : oldTaints){ - if (K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY.equals(taint.getKey())){ - return new BizNode().error(K8sResponseEnum.EXISTS.getCode(),"节点已被占用"); + for (Taint taint : oldTaints) { + if (K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY.equals(taint.getKey())) { + return new BizNode().error(K8sResponseEnum.EXISTS.getCode(), "节点已被占用"); } } Node node = client.nodes().withName(nodeName).edit().editSpec().addAllToTaints(BizConvertUtils.toTaints(bizTaintList)).endSpec().done(); return BizConvertUtils.toBizNode(node); - }catch (KubernetesClientException e) { + } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.schedulable error:{}", e); return new BizNode().error(String.valueOf(e.getCode()), e.getMessage()); } @@ -433,16 +465,16 @@ public class NodeApiImpl implements NodeApi { @Override public BizNode delTaint(String nodeName, List bizTaintList) { try { - if (StringUtils.isEmpty(nodeName) || org.springframework.util.CollectionUtils.isEmpty(bizTaintList)){ + if (StringUtils.isEmpty(nodeName) || org.springframework.util.CollectionUtils.isEmpty(bizTaintList)) { return new BizNode().errorBadRequest(); } Node nodeInfo = client.nodes().withName(nodeName).get(); - if (nodeInfo == null){ - return new BizNode().error(K8sResponseEnum.NOT_FOUND.getCode(), "节点["+nodeName+"]不存在"); + if (nodeInfo == null) { + return new BizNode().error(K8sResponseEnum.NOT_FOUND.getCode(), "节点[" + nodeName + "]不存在"); } Node node = client.nodes().withName(nodeName).edit().editSpec().removeAllFromTaints(BizConvertUtils.toTaints(bizTaintList)).endSpec().done(); return BizConvertUtils.toBizNode(node); - }catch (KubernetesClientException e) { + } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.delTaint error:{}", e); return new BizNode().error(String.valueOf(e.getCode()), e.getMessage()); } @@ -457,30 +489,30 @@ public class NodeApiImpl implements NodeApi { @Override public BizNode delTaint(String nodeName) { try { - if (StringUtils.isEmpty(nodeName)){ + if (StringUtils.isEmpty(nodeName)) { return new BizNode().errorBadRequest(); } Node nodeInfo = client.nodes().withName(nodeName).get(); - if (nodeInfo == null){ - return new BizNode().error(K8sResponseEnum.NOT_FOUND.getCode(), "节点["+nodeName+"]不存在"); + if (nodeInfo == null) { + return new BizNode().error(K8sResponseEnum.NOT_FOUND.getCode(), "节点[" + nodeName + "]不存在"); } List taints = nodeInfo.getSpec().getTaints(); Taint taint = new Taint(); - for (Taint obj : taints){ - if (K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY.equals(obj.getKey())){ + for (Taint obj : taints) { + if (K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY.equals(obj.getKey())) { taint = obj; } } Node node = client.nodes().withName(nodeName) .edit() - .editSpec() - .removeFromTaints(taint) - .endSpec() + .editSpec() + .removeFromTaints(taint) + .endSpec() .done(); return BizConvertUtils.toBizNode(node); - }catch (KubernetesClientException e) { + } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "NodeApiImpl.delTaint error:{}", e); return new BizNode().error(String.valueOf(e.getCode()), e.getMessage()); } @@ -494,8 +526,8 @@ public class NodeApiImpl implements NodeApi { * @return node资源隔离 标志 */ @Override - public String getNodeIsolationValue(Long isolationId){ - return StrUtil.format(K8sLabelConstants.PLATFORM_TAG_ISOLATION_VALUE, SpringContextHolder.getActiveProfile(),isolationId); + public String getNodeIsolationValue(Long isolationId) { + return StrUtil.format(K8sLabelConstants.PLATFORM_TAG_ISOLATION_VALUE, SpringContextHolder.getActiveProfile(), isolationId); } /** @@ -505,7 +537,7 @@ public class NodeApiImpl implements NodeApi { */ @Override public String getNodeIsolationValue() { - return StrUtil.format(K8sLabelConstants.PLATFORM_TAG_ISOLATION_VALUE, SpringContextHolder.getActiveProfile(),userContextService.getCurUserId()); + return StrUtil.format(K8sLabelConstants.PLATFORM_TAG_ISOLATION_VALUE, SpringContextHolder.getActiveProfile(), userContextService.getCurUserId()); } /** @@ -515,21 +547,21 @@ public class NodeApiImpl implements NodeApi { */ @Override public Toleration getNodeIsolationToleration() { - List nodes = getWithLabel(K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY,getNodeIsolationValue()); - if (CollectionUtils.isEmpty(nodes)){ + List nodes = getWithLabel(K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY, getNodeIsolationValue()); + if (CollectionUtils.isEmpty(nodes)) { return null; } - return ResourceBuildUtils.buildNoScheduleEqualToleration(K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY,getNodeIsolationValue()); + return ResourceBuildUtils.buildNoScheduleEqualToleration(K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY, getNodeIsolationValue()); } /** * 获取当前用户 资源隔离 NodeSelector - * @return Map + * @return Map */ @Override public Map getNodeIsolationNodeSelector() { - Map nodeSelector = new HashMap<>(MagicNumConstant.TWO); - nodeSelector.put(K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY,getNodeIsolationValue()); + Map nodeSelector = new HashMap<>(MagicNumConstant.TWO); + nodeSelector.put(K8sLabelConstants.PLATFORM_TAG_ISOLATION_KEY, getNodeIsolationValue()); return nodeSelector; } @@ -576,7 +608,7 @@ public class NodeApiImpl implements NodeApi { nodeItems.forEach(nodeItem -> { String nodeName = nodeItem.getMetadata().getName(); List collect = nodeMetrics.stream().filter(nodeMetric -> nodeMetric.getNodeName().equals(nodeName)).collect(Collectors.toList()); - if (!CollectionUtils.isEmpty(collect)){ + if (!CollectionUtils.isEmpty(collect)) { String memAmount = collect.get(0).getMemoryUsageAmount(); int memCapacity = Integer.parseInt(nodeItem.getStatus().getCapacity().get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount()) / MagicNumConstant.ONE_THOUSAND; int memAmountInt = Integer.parseInt(memAmount) / MagicNumConstant.BINARY_TEN_EXP; @@ -612,7 +644,7 @@ public class NodeApiImpl implements NodeApi { nodeItems.forEach(nodeItem -> { String nodeName = nodeItem.getMetadata().getName(); List collect = nodeMetrics.stream().filter(nodeMetric -> nodeMetric.getNodeName().equals(nodeName)).collect(Collectors.toList()); - if (!CollectionUtils.isEmpty(collect)){ + if (!CollectionUtils.isEmpty(collect)) { String cpuAmount = collect.get(0).getCpuUsageAmount(); int cpuCapacity = Integer.parseInt(nodeItem.getStatus().getCapacity().get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount()) * MagicNumConstant.ONE_THOUSAND; int cpuAmountInt = (int) (Long.parseLong(cpuAmount) / MagicNumConstant.ONE_THOUSAND / MagicNumConstant.ONE_THOUSAND); @@ -634,25 +666,25 @@ public class NodeApiImpl implements NodeApi { /** * 查询节点Gpu资源是否可分配 * - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 资源通用属性基类 * @param nodeItems Node集合 * @return List Node集合 */ - private List isGpuAllocatable(int gpuNum, List nodeItems) { + private List isGpuAllocatable(BaseResourceBo baseResourceBo, List nodeItems) { List nodeItemResults = new ArrayList<>(); List nodeNameList = new ArrayList<>(); - nodeItems = nodeItems.stream().filter(node -> node.getStatus().getCapacity().containsKey(K8sParamConstants.GPU_RESOURCE_KEY)).collect(Collectors.toList()); + nodeItems = nodeItems.stream().filter(node -> node.getStatus().getCapacity().containsKey(baseResourceBo.getK8sLabelKey())).collect(Collectors.toList()); - List podItems = filterRequestGpuPod(); + List podItems = filterRequestGpuPod(baseResourceBo.getK8sLabelKey(), baseResourceBo.getGpuModel()); Map allocatableGpu = new HashMap(); for (Node nodeItem : nodeItems) { int totalGpuAmount = 0; - int totalGpu = Integer.parseInt(nodeItem.getStatus().getCapacity().get(K8sParamConstants.GPU_RESOURCE_KEY).getAmount()); + int totalGpu = Integer.parseInt(nodeItem.getStatus().getCapacity().get(baseResourceBo.getK8sLabelKey()).getAmount()); String nodeName = nodeItem.getMetadata().getName(); List nodePodItems = podItems.stream().filter(pod -> pod.getSpec().getNodeName().equals(nodeName)).collect(Collectors.toList()); for (Pod pod : nodePodItems) { - String gpuAmount = pod.getSpec().getContainers().get(0).getResources().getLimits().get(K8sParamConstants.GPU_RESOURCE_KEY).getAmount(); + String gpuAmount = pod.getSpec().getContainers().get(0).getResources().getLimits().get(baseResourceBo.getK8sLabelKey()).getAmount(); totalGpuAmount = totalGpuAmount + Integer.parseInt(gpuAmount); } allocatableGpu.put(nodeName, totalGpu - totalGpuAmount); @@ -661,7 +693,7 @@ public class NodeApiImpl implements NodeApi { Set keySet = allocatableGpu.keySet(); keySet.forEach(key -> { - if (allocatableGpu.get(key) >= gpuNum) { + if (allocatableGpu.get(key) >= baseResourceBo.getGpuNum()) { nodeNameList.add(key); } }); @@ -677,13 +709,14 @@ public class NodeApiImpl implements NodeApi { * 获取申请了gpu的pod列表 * @return */ - private List filterRequestGpuPod(){ + private List filterRequestGpuPod(String K8sLabelKey, String gpuModel) { PodList podList = client.pods().list(); - if (CollectionUtil.isNotEmpty(podList.getItems())){ + if (CollectionUtil.isNotEmpty(podList.getItems())) { return podList.getItems().stream().filter(pod -> pod.getSpec().getContainers().get(0).getResources().getLimits() != null && - pod.getSpec().getContainers().get(0).getResources().getLimits().containsKey(K8sParamConstants.GPU_RESOURCE_KEY) && - pod.getStatus().getPhase().equals(PodPhaseEnum.RUNNING.getPhase())).collect(Collectors.toList()); + pod.getSpec().getContainers().get(0).getResources().getLimits().containsKey(K8sLabelKey) && + pod.getStatus().getPhase().equals(PodPhaseEnum.RUNNING.getPhase()) && + pod.getMetadata().getLabels().containsValue(gpuModel)).collect(Collectors.toList()); } return new ArrayList<>(); } @@ -692,10 +725,10 @@ public class NodeApiImpl implements NodeApi { * 查询集群已分配gpu数量 * @return */ - private Integer getAllocatedGpuNum(){ - return filterRequestGpuPod().stream().mapToInt(pod-> + private Integer getAllocatedGpuNum(String k8sLabelKey, String gpuModel) { + return filterRequestGpuPod(k8sLabelKey, gpuModel).stream().mapToInt(pod -> pod.getSpec().getContainers().stream().mapToInt(container -> - Integer.valueOf(String.valueOf(container.getResources().getLimits().get(K8sParamConstants.GPU_RESOURCE_KEY).getAmount()))).sum()) + Integer.valueOf(String.valueOf(container.getResources().getLimits().get(k8sLabelKey).getAmount()))).sum()) .sum(); } @@ -703,17 +736,17 @@ public class NodeApiImpl implements NodeApi { * 查询集群总gpu数量 * @return */ - private Integer getTotalGpuNum(){ + private Integer getTotalGpuNum(String k8sLabelKey, String gpuModel) { return listAll().stream() - .filter(node -> !node.isUnschedulable() && node.getCapacity().containsKey(K8sParamConstants.GPU_RESOURCE_KEY) && CollectionUtils.isEmpty(node.getTaints())) - .mapToInt(node -> Integer.valueOf(String.valueOf(node.getCapacity().get(K8sParamConstants.GPU_RESOURCE_KEY).getAmount()))).sum(); + .filter(node -> !node.isUnschedulable() && node.getLabels().containsValue(gpuModel)) + .mapToInt(node -> Integer.valueOf(String.valueOf(node.getCapacity().get(k8sLabelKey).getAmount()))).sum(); } /** * 查询节点是否存在指定的污点 * @return */ - private boolean doesTaintExit(Node node, BizTaint bizTaint){ + private boolean doesTaintExit(Node node, BizTaint bizTaint) { List taints = node.getSpec().getTaints().stream().filter(taint -> StringUtils.equalsAny(taint.getKey(), bizTaint.getKey()) && StringUtils.equalsAny(taint.getValue(), bizTaint.getValue())) diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java index 38bdf13..be1772a 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java @@ -138,7 +138,7 @@ public class PersistentVolumeClaimApiImpl implements PersistentVolumeClaimApi { //创建pv PersistentVolume pv = new PersistentVolumeBuilder() .withNewMetadata().addToLabels(pvLabels).withName(bo.getPvcName() + PV_SUFFIX).endMetadata() - .withNewSpec().addToCapacity(STORAGE, new Quantity(bo.getRequest())).addNewAccessMode(AccessModeEnum.READ_WRITE_ONCE.getType()).withNewPersistentVolumeReclaimPolicy(StringUtils.isNotEmpty(bo.getReclaimPolicy())?PvReclaimPolicyEnum.RECYCLE.getPolicy():bo.getReclaimPolicy()) + .withNewSpec().addToCapacity(STORAGE, new Quantity(bo.getRequest())).addNewAccessMode(AccessModeEnum.READ_WRITE_ONCE.getType()).withNewPersistentVolumeReclaimPolicy(StringUtils.isEmpty(bo.getReclaimPolicy())?PvReclaimPolicyEnum.RECYCLE.getPolicy():bo.getReclaimPolicy()) .withNewHostPath().withNewPath(bo.getPath()).withType(K8sParamConstants.HOST_PATH_TYPE).endHostPath() .endSpec() .build(); @@ -310,6 +310,27 @@ public class PersistentVolumeClaimApiImpl implements PersistentVolumeClaimApi { } /** + * 删除PVC + * + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return PtBaseResult 基础结果类 + */ + @Override + public PtBaseResult deletePvcByResourceName(String namespace, String resourceName) { + if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)) { + return new PtBaseResult().baseErrorBadRequest(); + } + try { + client.persistentVolumeClaims().inNamespace(namespace).withLabel(K8sLabelConstants.BASE_TAG_SOURCE,resourceName).delete(); + return new PtBaseResult(); + } catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "PersistentVolumeClaimApiImpl.deletePvcByResourceName error, param:[namespace]={}, error:{}", namespace, e); + return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage()); + } + } + + /** * 回收存储(recycle 的pv才能回收) * * @param namespace 命名空间 diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PodApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PodApiImpl.java index 643ff26..b8c4e6b 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PodApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PodApiImpl.java @@ -18,23 +18,32 @@ package org.dubhe.k8s.api.impl; import cn.hutool.core.collection.CollectionUtil; +import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.IORuntimeException; import cn.hutool.core.util.StrUtil; import cn.hutool.http.HttpRequest; import cn.hutool.http.HttpResponse; import cn.hutool.http.HttpStatus; +import com.alibaba.fastjson.JSON; +import com.google.common.collect.Lists; +import com.google.common.io.Files; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.PodList; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientException; +import io.fabric8.kubernetes.client.dsl.ExecWatch; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.StringConstant; import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.biz.base.utils.RegexUtil; import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.file.utils.IOUtil; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.k8s.api.JupyterResourceApi; import org.dubhe.k8s.api.MetricsApi; import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.bo.LabelBO; import org.dubhe.k8s.domain.resource.BizPod; @@ -42,13 +51,17 @@ import org.dubhe.k8s.domain.vo.PtJupyterDeployVO; import org.dubhe.k8s.domain.vo.PtPodsVO; import org.dubhe.k8s.enums.K8sResponseEnum; import org.dubhe.k8s.enums.PodPhaseEnum; +import org.dubhe.k8s.listener.DefaultPodExecListener; import org.dubhe.k8s.utils.BizConvertUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.LabelUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.util.CollectionUtils; +import java.io.File; +import java.io.InputStream; import java.util.*; +import java.util.concurrent.CountDownLatch; import java.util.stream.Collectors; /** @@ -312,6 +325,21 @@ public class PodApiImpl implements PodApi { } /** + * 根据resourceName查询Pod集合 + * @param resourceName + * @return + */ + @Override + public List listByResourceName(String resourceName) { + try{ + return client.pods().inAnyNamespace().withLabel(K8sLabelConstants.BASE_TAG_SOURCE, resourceName).list().getItems(); + }catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "PodApiImpl.listByResourceName error:{}", e); + return new ArrayList<>(); + } + } + + /** * 根据命名空间查询Pod集合 * * @param namespace 命名空间 @@ -367,6 +395,18 @@ public class PodApiImpl implements PodApi { return ""; } + + /** + * 根据resourceName 获取pod对应k8s中labels + * + * @param resourceName 资源名称 + * @return Map map + */ + @Override + public Map getLabels(String resourceName){ + return LabelUtils.withEnvResourceName(resourceName); + } + /** * 根据命名空间和资源名获得Token信息 * @@ -414,6 +454,136 @@ public class PodApiImpl implements PodApi { } /** + * 拷贝文件到pod + * @param namespace 命名空间 + * @param podName pod名称 + * @param containerName 容器名称 + * @param file 文件 + * @param targetDir 目标路径 + */ + @Override + public void copyToPod(String namespace, String podName, String containerName, File file, String targetDir) { + try { + LogUtil.info(LogEnum.BIZ_K8S, "PodApiImpl.copyToPod params:[namespace]={}, [podName]={},[containerName]={},[file]={},[targetDir]={}",namespace, podName,containerName,file.getAbsolutePath(),targetDir); + client.pods().inNamespace(namespace).withName(podName) + .inContainer(containerName) + .file(targetDir) + .upload(file.toPath()); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "PodApiImpl.copyToPod error, params:[namespace]={}, [podName]={},[containerName]={},[file]={},[targetDir]={}, error:{}",namespace, podName,containerName,file.getAbsolutePath(),targetDir, e); + } + } + + /** + * 同步执行 + * @param namespace 命名空间 + * @param podName pod名称 + * @param containerName 容器名称 + * @param cmd 命令 + */ + @Override + public void exec(String namespace, String podName, String containerName, String cmd) { + try { + LogUtil.info(LogEnum.BIZ_K8S, "PodApiImpl.exec params:[namespace]={}, [podName]={},[containerName]={},[cmd]={}",namespace, podName,containerName,cmd); + final CountDownLatch execLatch = new CountDownLatch(1); + ExecWatch execWatch = client.pods().inNamespace(namespace).withName(podName).inContainer(containerName) + .redirectingOutput() + .withTTY() //不展示输出 + .usingListener(new DefaultPodExecListener(namespace, podName, containerName, execLatch)) + .exec("sh", "-c", cmd); + execLatch.await(); + } catch (InterruptedException e) { + LogUtil.error(LogEnum.BIZ_K8S, "PodApiImpl.exec error,params:[namespace]={}, [podName]={},[containerName]={},[cmd]={},error:{}",namespace, podName,containerName,cmd,e); + } + } + + /** + * 设置pod间 ssh免密登录 + * @param podList pod 列表 + */ + @Override + public void sshAuthentication(List podList) { + if (CollectionUtils.isEmpty(podList) || podList.size() == MagicNumConstant.ONE) { + return; + } + LogUtil.info(LogEnum.BIZ_K8S, "PodApiImpl.sshAuthentication params:[podList]={}", podList.stream().map(p->p.getMetadata().getName()).collect(Collectors.toList())); + File tempDir = Files.createTempDir(); + try ( + InputStream isRsa = getClass().getClassLoader().getResourceAsStream("key/id_rsa"); + InputStream isRsaPub = getClass().getClassLoader().getResourceAsStream("key/id_rsa.pub") + ) { + //id_rsa + File tempIdRsa = FileUtil.createTempFile(tempDir); + IOUtil.copy(isRsa, tempIdRsa); + //id_rsa.pub + File tempIdRsaPub = FileUtil.createTempFile(tempDir); + IOUtil.copy(isRsaPub, tempIdRsaPub); + List pubLines = FileUtil.readLines(tempIdRsaPub, StringConstant.UTF8); + String pubKeyContent = pubLines.get(0); + //按机器修改id_rsa.pub, 并组装一个大而全的authorized_keys + List idRsaPubFiles = Lists.newArrayList(); + File tempAuthorizedKeys = FileUtil.createTempFile(tempDir); + List pubKeys = Lists.newArrayList(); + for (int i = 0; i < podList.size(); i++) { + Pod podInfo = podList.get(i); + String podPubKeyContent = pubKeyContent.replace("{{ip}}", podInfo.getStatus().getPodIP()); + File tempIdRsaPubOnPod = FileUtil.createTempFile(tempDir); + FileUtil.writeLines(Collections.singletonList(podPubKeyContent), tempIdRsaPubOnPod, StringConstant.UTF8); + idRsaPubFiles.add(tempIdRsaPubOnPod); + pubKeys.add(podPubKeyContent); + } + FileUtil.writeLines(pubKeys, tempAuthorizedKeys, StringConstant.UTF8); + + //获得所有pod, 上传三个文件 + for (int i = 0; i < podList.size(); i++) { + Pod pod = podList.get(i); + String containerName = pod.getSpec().getContainers().get(MagicNumConstant.ZERO).getName(); + String namespace = pod.getMetadata().getNamespace(); + //上传id_rsa + copyToPod(namespace, pod.getMetadata().getName(), containerName, tempIdRsa, "/root/.ssh/id_rsa"); + //上传id_rsa.pub + File tempIdRsaPubOnPod = idRsaPubFiles.get(i); + copyToPod(namespace, pod.getMetadata().getName(), containerName, tempIdRsaPubOnPod, "/root/.ssh/id_rsa.pub"); + //上传authorized_keys + copyToPod(namespace, pod.getMetadata().getName(), containerName, tempAuthorizedKeys, "/root/.ssh/authorized_keys"); + //修改权限 + String chmodCmd = StrUtil.format("chmod 644 /root/.ssh/authorized_keys && chmod 600 /root/.ssh/id_rsa && chmod 644 /root/.ssh/id_rsa.pub"); + exec(pod.getMetadata().getNamespace(), pod.getMetadata().getName(), containerName, chmodCmd); + } + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "PodApiImpl.sshAuthentication error,params:[podList]={},error:{}", podList, e); + } finally { + //清理临时文件 + FileUtil.del(tempDir); + } + } + + /** + * 设置pod NODE_IPS 环境变量为 pod ip 列表 + * @param podList pod 列表 + */ + @Override + public void setNodeIpsEnv(List podList) { + if (CollectionUtils.isEmpty(podList) || podList.size() == MagicNumConstant.ONE){ + return; + } + LogUtil.info(LogEnum.BIZ_K8S, "PodApiImpl.setNodeIpsEnv params:[podList]={}", podList.stream().map(p->p.getMetadata().getName()).collect(Collectors.toList())); + List nodeIpList = new ArrayList<>(); + for (int i = 0; i < podList.size(); i++){ + nodeIpList.add(podList.get(i).getStatus().getPodIP()); + } + String nodeIps = JSON.toJSONString(nodeIpList); + LogUtil.info(LogEnum.BIZ_K8S, "PodApiImpl.setNodeIpsEnv nodeIps={}", nodeIps); + for (int i = 0; i < podList.size(); i++) { + Pod pod = podList.get(i); + String containerName = pod.getSpec().getContainers().get(MagicNumConstant.ZERO).getName(); + //设置 NODE_IPS 环境变量 + String chmodCmd = StrUtil.format("echo 'export NODE_IPS="+nodeIps+"' >> /root/.bashrc"); + exec(pod.getMetadata().getNamespace(), pod.getMetadata().getName(), containerName, chmodCmd); + } + } + + /** * 验证访问Notebook的url * * @param jupyterUrl 访问Notebook的url diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java index 5136e81..7611f7b 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java @@ -19,13 +19,7 @@ package org.dubhe.k8s.api.impl; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; -import io.fabric8.kubernetes.api.model.Quantity; -import io.fabric8.kubernetes.api.model.ResourceQuota; -import io.fabric8.kubernetes.api.model.ResourceQuotaBuilder; -import io.fabric8.kubernetes.api.model.ResourceQuotaList; -import io.fabric8.kubernetes.api.model.ScopeSelector; -import io.fabric8.kubernetes.api.model.ScopeSelectorBuilder; -import io.fabric8.kubernetes.api.model.ScopedResourceSelectorRequirement; +import io.fabric8.kubernetes.api.model.*; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientException; import org.dubhe.biz.base.constant.MagicNumConstant; @@ -36,6 +30,7 @@ import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.k8s.api.ResourceQuotaApi; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.PtResourceQuotaBO; import org.dubhe.k8s.domain.resource.BizQuantity; import org.dubhe.k8s.domain.resource.BizResourceQuota; @@ -72,7 +67,7 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { @Override public BizResourceQuota create(PtResourceQuotaBO bo) { try { - LogUtil.info(LogEnum.BIZ_K8S,"Input bo={}", bo); + LogUtil.info(LogEnum.BIZ_K8S, "Input bo={}", bo); Gson gson = new Gson(); List scopeSelector = gson.fromJson(gson.toJson(bo.getScopeSelector()), new TypeToken>() { }.getType()); @@ -81,21 +76,21 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { hard.put(obj.getKey(), new Quantity(obj.getValue().getAmount(), obj.getValue().getFormat())); } ResourceQuota resourceQuota = null; - if (scopeSelector != null){ + if (scopeSelector != null) { ScopeSelector item = new ScopeSelectorBuilder().addAllToMatchExpressions(scopeSelector).build(); resourceQuota = new ResourceQuotaBuilder().withNewMetadata().withName(bo.getName()).endMetadata() .withNewSpec().withHard(hard).withNewScopeSelectorLike(item).endScopeSelector().endSpec().build(); - }else { + } else { resourceQuota = new ResourceQuotaBuilder().withNewMetadata().withName(bo.getName()).endMetadata() .withNewSpec().withHard(hard).endSpec().build(); } BizResourceQuota bizResourceQuota = BizConvertUtils.toBizResourceQuota(client.resourceQuotas().inNamespace(bo.getNamespace()).createOrReplace(resourceQuota)); - LogUtil.info(LogEnum.BIZ_K8S,"Output {}", bizResourceQuota); - return bizResourceQuota; + LogUtil.info(LogEnum.BIZ_K8S, "Output {}", bizResourceQuota); + return bizResourceQuota; } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "ResourceQuotaApiImpl.create error, param:{} error:{}", bo, e); - return new BizResourceQuota().error(String.valueOf(e.getCode()),e.getMessage()); + return new BizResourceQuota().error(String.valueOf(e.getCode()), e.getMessage()); } } @@ -105,35 +100,41 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { * @param name ResourceQuota 名称 * @param cpu cpu限制 单位核 0表示不限制 * @param memory 内存限制 单位Gi 0表示不限制 - * @param gpu gpu限制 单位张 0表示不限制 + * @param gpuLimit gpu限制 * @return */ @Override - public BizResourceQuota create(String namespace, String name, Integer cpu, Integer memory, Integer gpu) { + public BizResourceQuota create(String namespace, String name, Integer cpu, Integer memory, Map gpuLimit) { try { - LogUtil.info(LogEnum.BIZ_K8S,"Input namespace={},name={},cpu={},mem={},gpu={}", namespace,name,cpu,memory,gpu); - if (StringUtils.isEmpty(namespace)){ + LogUtil.info(LogEnum.BIZ_K8S, "Input namespace={},name={},cpu={},mem={},gpu={}", namespace, name, cpu, memory, gpuLimit); + if (StringUtils.isEmpty(namespace)) { return new BizResourceQuota().error(K8sResponseEnum.BAD_REQUEST.getCode(), "namespace is empty"); } - if (cpu == null && memory == null && gpu == null){ + if (cpu == null && memory == null && gpuLimit == null) { return new BizResourceQuota().error(K8sResponseEnum.BAD_REQUEST.getCode(), "cpu mem gpu is empty"); } PtResourceQuotaBO bo = new PtResourceQuotaBO(); bo.setNamespace(namespace); - bo.setName(StringUtils.isEmpty(name)?namespace:namespace); - if (cpu != null && cpu > 0){ + bo.setName(StringUtils.isEmpty(name) ? namespace : namespace); + if (cpu != null) { bo.addCpuLimitsHard(String.valueOf(cpu), SymbolConstant.BLANK); } - if (memory > 0){ + if (memory != null) { bo.addMemoryLimitsHard(String.valueOf(memory), K8sParamConstants.MEM_UNIT_GI); } - if (gpu > 0){ - bo.addGpuLimitsHard(String.valueOf(gpu)); + if (!gpuLimit.isEmpty()) { + Map map = new HashMap<>(2); + for (Map.Entry entry : gpuLimit.entrySet()) { + if (entry.getValue() != null) { + map.put(entry.getKey(), String.valueOf(entry.getValue())); + } + } + bo.addGpuLimitsHard(map); } return create(bo); } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "ResourceQuotaApiImpl.create error, param:{} error:{}", e); - return new BizResourceQuota().error(String.valueOf(e.getCode()),e.getMessage()); + return new BizResourceQuota().error(String.valueOf(e.getCode()), e.getMessage()); } } @@ -146,18 +147,18 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { @Override public List list(String namespace) { try { - LogUtil.info(LogEnum.BIZ_K8S,"Input namespace={}", namespace); + LogUtil.info(LogEnum.BIZ_K8S, "Input namespace={}", namespace); if (StringUtils.isEmpty(namespace)) { ResourceQuotaList resourceQuotaList = client.resourceQuotas().inAnyNamespace().list(); return resourceQuotaList.getItems().parallelStream().map(obj -> BizConvertUtils.toBizResourceQuota(obj)).collect(Collectors.toList()); } else { ResourceQuotaList resourceQuotaList = client.resourceQuotas().inNamespace(namespace).list(); List bizResourceQuotaList = resourceQuotaList.getItems().parallelStream().map(obj -> BizConvertUtils.toBizResourceQuota(obj)).collect(Collectors.toList()); - LogUtil.info(LogEnum.BIZ_K8S,"Output {}", bizResourceQuotaList); + LogUtil.info(LogEnum.BIZ_K8S, "Output {}", bizResourceQuotaList); return bizResourceQuotaList; } - }catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "ResourceQuotaApiImpl.list error, param:[namespace]={},error:{}", namespace,e); + } catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "ResourceQuotaApiImpl.list error, param:[namespace]={},error:{}", namespace, e); return Collections.EMPTY_LIST; } } @@ -171,61 +172,63 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { */ @Override public PtBaseResult delete(String namespace, String name) { - LogUtil.info(LogEnum.BIZ_K8S,"Input namespace={};name={}", namespace,name); + LogUtil.info(LogEnum.BIZ_K8S, "Input namespace={};name={}", namespace, name); if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(name)) { return new PtBaseResult().baseErrorBadRequest(); } try { - if (client.resourceQuotas().inNamespace(namespace).withName(name).delete()){ + if (client.resourceQuotas().inNamespace(namespace).withName(name).delete()) { return new PtBaseResult(); - }else { + } else { return K8sResponseEnum.REPEAT.toPtBaseResult(); } } catch (KubernetesClientException e) { - LogUtil.error(LogEnum.BIZ_K8S, "ResourceQuotaApiImpl.delete error, param:[namespace]={}, [name]={}, error:{}",namespace, name, e); - return new PtBaseResult(String.valueOf(e.getCode()),e.getMessage()); + LogUtil.error(LogEnum.BIZ_K8S, "ResourceQuotaApiImpl.delete error, param:[namespace]={}, [name]={}, error:{}", namespace, name, e); + return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage()); } } /** * 判断资源是否达到限制 * - * @param cpuNum 单位为m 1核等于1000m - * @param memNum 单位为Mi 1Mi等于1024Ki - * @param gpuNum 单位为显卡,即"1"表示1张显卡 + * @param baseResourceBo 资源通用属性基类 * @return LimitsOfResourcesEnum 资源超限枚举类 */ @Override - public LimitsOfResourcesEnum reachLimitsOfResources(String namespace,Integer cpuNum, Integer memNum, Integer gpuNum) { - if (StringUtils.isEmpty(namespace)){ + public LimitsOfResourcesEnum reachLimitsOfResources(BaseResourceBo baseResourceBo) { + LogUtil.info(LogEnum.BIZ_K8S, "reachLimitsOfResources baseResourceBo={}", baseResourceBo); + if (StringUtils.isEmpty(baseResourceBo.getNamespace())) { return LimitsOfResourcesEnum.ADEQUATE; } - List bizResourceQuotas = list(namespace); - if (CollectionUtils.isEmpty(bizResourceQuotas)){ + List bizResourceQuotas = list(baseResourceBo.getNamespace()); + if (CollectionUtils.isEmpty(bizResourceQuotas)) { return LimitsOfResourcesEnum.ADEQUATE; } - for (BizResourceQuota bizResourceQuota : bizResourceQuotas){ - if (!CollectionUtils.isEmpty(bizResourceQuota.getMatchExpressions())){ + for (BizResourceQuota bizResourceQuota : bizResourceQuotas) { + if (!CollectionUtils.isEmpty(bizResourceQuota.getMatchExpressions())) { continue; } Map remainder = bizResourceQuota.getRemainder(); BizQuantity cpuRemainder = remainder.get(K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY); - if (cpuRemainder != null && cpuNum != null){ - if (UnitConvertUtils.cpuFormatToN(cpuRemainder.getAmount(),cpuRemainder.getFormat()) < cpuNum * MagicNumConstant.MILLION_LONG){ + if (cpuRemainder != null && baseResourceBo.getCpuNum() != null) { + if (UnitConvertUtils.cpuFormatToN(cpuRemainder.getAmount(), cpuRemainder.getFormat()) < baseResourceBo.getCpuNum() * MagicNumConstant.MILLION_LONG) { + LogUtil.info(LogEnum.BIZ_K8S, "reachLimitsOfResources "+LimitsOfResourcesEnum.LIMITS_OF_CPU.getMessage()+" baseResourceBo={}", baseResourceBo); return LimitsOfResourcesEnum.LIMITS_OF_CPU; } } BizQuantity memRemainder = remainder.get(K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY); - if (memRemainder != null && memNum != null){ - if (UnitConvertUtils.memFormatToMi(memRemainder.getAmount(),memRemainder.getFormat()) < memNum){ + if (memRemainder != null && baseResourceBo.getMemNum() != null) { + if (UnitConvertUtils.memFormatToMi(memRemainder.getAmount(), memRemainder.getFormat()) < baseResourceBo.getMemNum()) { + LogUtil.info(LogEnum.BIZ_K8S, "reachLimitsOfResources "+LimitsOfResourcesEnum.LIMITS_OF_MEM.getMessage()+" baseResourceBo={}", baseResourceBo); return LimitsOfResourcesEnum.LIMITS_OF_MEM; } } - BizQuantity gpuRemainder = remainder.get(K8sParamConstants.RESOURCE_QUOTA_GPU_LIMITS_KEY); - if (gpuRemainder != null && gpuNum != null){ - if (Integer.valueOf(gpuRemainder.getAmount()) < gpuNum){ + BizQuantity gpuRemainder = remainder.get(K8sParamConstants.K8S_LABEL_KEY_PREFIX + baseResourceBo.getK8sLabelKey()); + if (gpuRemainder != null && baseResourceBo.getGpuNum() != null) { + if (Integer.valueOf(gpuRemainder.getAmount()) < baseResourceBo.getGpuNum()) { + LogUtil.info(LogEnum.BIZ_K8S, "reachLimitsOfResources "+LimitsOfResourcesEnum.LIMITS_OF_GPU.getMessage()+" baseResourceBo={}", baseResourceBo); return LimitsOfResourcesEnum.LIMITS_OF_GPU; } } @@ -233,4 +236,27 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { return LimitsOfResourcesEnum.ADEQUATE; } + + /** + * 判断资源是否达到限制 + * + * @param namespace 命名空间 + * @param cpuNum cpu限制 单位核 0表示不限制 + * @param memNum 内存限制 单位G 0表示不限制 + * @param gpuNum gpu限制 + * @param k8sLabelKey k8s GPU资源标签key值(例如:nvidia.com/gpu) + * @return LimitsOfResourcesEnum 资源超限枚举类 + */ + @Override + public LimitsOfResourcesEnum reachLimitsOfResourcesConvert(String namespace, Integer cpuNum, Integer memNum, Integer gpuNum, String k8sLabelKey) { + BaseResourceBo baseResourceBo = new BaseResourceBo(); + baseResourceBo.setCpuNum(cpuNum); + baseResourceBo.setMemNum(memNum); + baseResourceBo.setGpuNum(gpuNum); + baseResourceBo.setNamespace(namespace); + if (gpuNum > MagicNumConstant.ZERO) { + baseResourceBo.setK8sLabelKey(k8sLabelKey).setUseGpu(true); + } + return reachLimitsOfResources(baseResourceBo); + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ServiceApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ServiceApiImpl.java new file mode 100644 index 0000000..fd5ffa0 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ServiceApiImpl.java @@ -0,0 +1,72 @@ +package org.dubhe.k8s.api.impl; + +import cn.hutool.core.collection.CollectionUtil; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceList; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentList; +import io.fabric8.kubernetes.client.KubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClientException; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.k8s.api.ServiceApi; +import org.dubhe.k8s.domain.resource.BizService; +import org.dubhe.k8s.domain.vo.TerminalResourceVO; +import org.dubhe.k8s.utils.BizConvertUtils; +import org.dubhe.k8s.utils.K8sUtils; +import org.dubhe.k8s.utils.LabelUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * @description k8s Service接口实现 + * @date 2021-10-27 + */ +public class ServiceApiImpl implements ServiceApi { + + private K8sUtils k8sUtils; + private KubernetesClient client; + + + public ServiceApiImpl(K8sUtils k8sUtils) { + this.k8sUtils = k8sUtils; + this.client = k8sUtils.getClient(); + } + + /** + * 查询命名空间下所有service + * + * @param namespace 命名空间 + * @return List Service业务类集合 + */ + @Override + public List getWithNameSpace(String namespace) { + try { + List BizServiceList =new ArrayList<>(); + ServiceList svcList = client.services().inNamespace(namespace).list(); + if(CollectionUtil.isEmpty(svcList.getItems())){ + return BizServiceList; + } + BizServiceList = BizConvertUtils.toBizServiceList(svcList.getItems()); + LogUtil.info(LogEnum.BIZ_K8S,"Output {}", BizServiceList); + return BizServiceList; + } catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "getWithNameSpace error:", e); + return Collections.EMPTY_LIST; + } + } + + /** + * 根据resourceName 获取service对应k8s中labels + * + * @param resourceName 资源名称 + * @return Map map + */ + @Override + public Map getLabels(String resourceName){ + return LabelUtils.withEnvResourceName(resourceName); + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java index 01c5c8d..8f7e460 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java @@ -20,6 +20,7 @@ package org.dubhe.k8s.api.impl; import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.StrUtil; +import com.alibaba.fastjson.JSON; import io.fabric8.kubernetes.api.model.Quantity; import io.fabric8.kubernetes.api.model.Service; import io.fabric8.kubernetes.api.model.ServiceList; @@ -32,31 +33,18 @@ import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.biz.file.api.FileStoreApi; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; -import org.dubhe.k8s.api.NodeApi; -import org.dubhe.k8s.api.PersistentVolumeClaimApi; -import org.dubhe.k8s.api.PodApi; -import org.dubhe.k8s.api.ResourceIisolationApi; -import org.dubhe.k8s.api.ResourceQuotaApi; -import org.dubhe.k8s.api.TerminalApi; -import org.dubhe.k8s.api.VolumeApi; +import org.dubhe.k8s.api.*; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BaseResourceBo; import org.dubhe.k8s.domain.bo.BuildFsVolumeBO; import org.dubhe.k8s.domain.bo.BuildServiceBO; import org.dubhe.k8s.domain.bo.TerminalBO; -import org.dubhe.k8s.domain.vo.PtJupyterDeployVO; import org.dubhe.k8s.domain.vo.TerminalResourceVO; import org.dubhe.k8s.domain.vo.VolumeVO; -import org.dubhe.k8s.enums.K8sKindEnum; -import org.dubhe.k8s.enums.K8sResponseEnum; -import org.dubhe.k8s.enums.LackOfResourcesEnum; -import org.dubhe.k8s.enums.LimitsOfResourcesEnum; -import org.dubhe.k8s.enums.ServiceTypeENum; -import org.dubhe.k8s.utils.BizConvertUtils; -import org.dubhe.k8s.utils.K8sUtils; -import org.dubhe.k8s.utils.LabelUtils; -import org.dubhe.k8s.utils.ResourceBuildUtils; -import org.dubhe.k8s.utils.YamlUtils; +import org.dubhe.k8s.enums.*; +import org.dubhe.k8s.utils.*; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.util.CollectionUtils; @@ -85,6 +73,9 @@ public class TerminalApiImpl implements TerminalApi { @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sCommonUtils k8sCommonUtils; + public TerminalApiImpl(K8sUtils k8sUtils) { this.k8sUtils = k8sUtils; this.client = k8sUtils.getClient(); @@ -99,14 +90,19 @@ public class TerminalApiImpl implements TerminalApi { @Override public TerminalResourceVO create(TerminalBO bo) { try { - LogUtil.info(LogEnum.BIZ_K8S, "Params of creating TerminalApiImpl--create:{}", bo); + LogUtil.info(LogEnum.BIZ_K8S, "Params of creating TerminalApiImpl--create:{}", JSON.toJSONString(bo)); + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + baseResourceBo.setName(bo.getResourceName()); //资源配额校验 - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { + LogUtil.info(LogEnum.BIZ_K8S, "TerminalApiImpl--limitsOfResources:{}", limitsOfResources.getMessage()); return new TerminalResourceVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } - LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + LackOfResourcesEnum lack = nodeApi.isAllocatable(baseResourceBo); if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { + LogUtil.info(LogEnum.BIZ_K8S, "TerminalApiImpl--lack:{}", lack.getMessage()); return new TerminalResourceVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } if (!fileStoreApi.createDirs(bo.getDirList().toArray(new String[MagicNumConstant.ZERO]))) { @@ -120,8 +116,8 @@ public class TerminalApiImpl implements TerminalApi { } //共享存储 - Integer ShmMemAmount = bo.getMemNum() == null?MagicNumConstant.BINARY_TEN_EXP:bo.getMemNum()/MagicNumConstant.TWO; - volumeVO.addShmFsVolume(new Quantity(String.valueOf(ShmMemAmount),K8sParamConstants.MEM_UNIT)); + Integer ShmMemAmount = bo.getMemNum() == null ? MagicNumConstant.BINARY_TEN_EXP : bo.getMemNum() / MagicNumConstant.TWO; + volumeVO.addShmFsVolume(new Quantity(String.valueOf(ShmMemAmount), K8sParamConstants.MEM_UNIT)); //名称生成 String deploymentName = StrUtil.format(K8sParamConstants.RESOURCE_NAME_TEMPLATE, bo.getResourceName(), RandomUtil.randomString(MagicNumConstant.EIGHT)); @@ -132,6 +128,8 @@ public class TerminalApiImpl implements TerminalApi { Map podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(), bo.getTaskIdentifyLabel()); //部署deployment + bo.setImagePullPolicy(ImagePullPolicyEnum.ALWAYS.getPolicy()); + bo.setCustomResourcesLimitsMap(k8sCommonUtils.getRdmaResource()); Deployment deployment = ResourceBuildUtils.buildDeployment(bo, volumeVO, deploymentName); LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml信息为{}", deploymentName, YamlUtils.dumpAsYaml(deployment)); resourceIisolationApi.addIisolationInfo(deployment); @@ -139,16 +137,16 @@ public class TerminalApiImpl implements TerminalApi { //部署service BuildServiceBO buildServiceBO = new BuildServiceBO(bo.getNamespace(), svcName, baseLabels, podLabels, ServiceTypeENum.NODE_PORT.getType()); - if (!CollectionUtils.isEmpty(bo.getPorts())){ + if (!CollectionUtils.isEmpty(bo.getPorts())) { bo.getPorts().forEach(port -> { - buildServiceBO.addPort(ResourceBuildUtils.buildServicePort(port, port, SymbolConstant.PORT+SymbolConstant.HYPHEN+port)); + buildServiceBO.addPort(ResourceBuildUtils.buildServicePort(port, port, SymbolConstant.PORT + SymbolConstant.HYPHEN + port)); }); } Service service = ResourceBuildUtils.buildService(buildServiceBO); LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml信息为{}", svcName, YamlUtils.dumpAsYaml(service)); Service serviceResult = client.services().create(service); - return new TerminalResourceVO(BizConvertUtils.toBizDeployment(deploymentResult),BizConvertUtils.toBizService(serviceResult)); - }catch (KubernetesClientException e) { + return new TerminalResourceVO(BizConvertUtils.toBizDeployment(deploymentResult), BizConvertUtils.toBizService(serviceResult)); + } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "TerminalApiImpl.create error, param:{} error:", bo, e); return new TerminalResourceVO().error(String.valueOf(e.getCode()), e.getMessage()); } @@ -163,12 +161,12 @@ public class TerminalApiImpl implements TerminalApi { @Override public PtBaseResult delete(String namespace, String resourceName) { try { - LogUtil.info(LogEnum.BIZ_K8S, "delete Terminal namespace:{} resourceName:{}",namespace,resourceName); + LogUtil.info(LogEnum.BIZ_K8S, "delete Terminal namespace:{} resourceName:{}", namespace, resourceName); DeploymentList deploymentList = client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list(); - if (deploymentList == null || deploymentList.getItems().size() == 0){ + if (deploymentList == null || deploymentList.getItems().size() == 0) { return new PtBaseResult(); } - persistentVolumeClaimApi.delete(namespace,resourceName); + persistentVolumeClaimApi.deletePvcByResourceName(namespace, resourceName); persistentVolumeClaimApi.deletePvByResourceName(resourceName); Boolean res = client.services().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete() && client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete(); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java index 8e61e87..b6fc4ef 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java @@ -22,15 +22,7 @@ import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSON; import com.google.common.collect.Maps; -import io.fabric8.kubernetes.api.model.Container; -import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.EnvVarBuilder; -import io.fabric8.kubernetes.api.model.Quantity; -import io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder; -import io.fabric8.kubernetes.api.model.Volume; -import io.fabric8.kubernetes.api.model.VolumeBuilder; -import io.fabric8.kubernetes.api.model.VolumeMount; -import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.api.model.*; import io.fabric8.kubernetes.api.model.batch.Job; import io.fabric8.kubernetes.api.model.batch.JobBuilder; import io.fabric8.kubernetes.api.model.batch.JobList; @@ -42,45 +34,25 @@ import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.file.api.FileStoreApi; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; -import org.dubhe.k8s.api.LogMonitoringApi; -import org.dubhe.k8s.api.NodeApi; -import org.dubhe.k8s.api.PersistentVolumeClaimApi; -import org.dubhe.k8s.api.PodApi; -import org.dubhe.k8s.api.ResourceIisolationApi; -import org.dubhe.k8s.api.ResourceQuotaApi; -import org.dubhe.k8s.api.TrainJobApi; +import org.dubhe.k8s.api.*; import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; -import org.dubhe.k8s.domain.bo.PtJupyterJobBO; -import org.dubhe.k8s.domain.bo.PtMountDirBO; -import org.dubhe.k8s.domain.bo.PtPersistentVolumeClaimBO; -import org.dubhe.k8s.domain.bo.TaskYamlBO; +import org.dubhe.k8s.domain.bo.*; import org.dubhe.k8s.domain.entity.K8sTask; import org.dubhe.k8s.domain.resource.BizJob; import org.dubhe.k8s.domain.resource.BizPersistentVolumeClaim; import org.dubhe.k8s.domain.vo.PtJupyterJobVO; -import org.dubhe.k8s.enums.ImagePullPolicyEnum; -import org.dubhe.k8s.enums.K8sKindEnum; -import org.dubhe.k8s.enums.K8sResponseEnum; -import org.dubhe.k8s.enums.LackOfResourcesEnum; -import org.dubhe.k8s.enums.LimitsOfResourcesEnum; -import org.dubhe.k8s.enums.RestartPolicyEnum; -import org.dubhe.k8s.enums.ShellCommandEnum; +import org.dubhe.k8s.enums.*; +import org.dubhe.k8s.service.K8sGpuConfigService; import org.dubhe.k8s.service.K8sTaskService; -import org.dubhe.k8s.utils.BizConvertUtils; -import org.dubhe.k8s.utils.K8sUtils; -import org.dubhe.k8s.utils.LabelUtils; +import org.dubhe.k8s.utils.*; +import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import javax.annotation.Resource; import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import java.util.stream.Collectors; /** @@ -90,6 +62,7 @@ import java.util.stream.Collectors; public class TrainJobApiImpl implements TrainJobApi { private K8sUtils k8sUtils; + private KubernetesClient client; @Resource(name = "hostFileStoreApiImpl") @@ -99,10 +72,7 @@ public class TrainJobApiImpl implements TrainJobApi { private PersistentVolumeClaimApi persistentVolumeClaimApi; @Autowired private NodeApi nodeApi; - @Autowired - private PodApi podApi; - @Autowired - private LogMonitoringApi logMonitoringApi; + @Autowired private K8sTaskService k8sTaskService; @Autowired @@ -111,6 +81,11 @@ public class TrainJobApiImpl implements TrainJobApi { private ResourceQuotaApi resourceQuotaApi; @Autowired private ResourceIisolationApi resourceIisolationApi; + @Autowired + private K8sGpuConfigService k8sGpuConfigService; + + @Autowired + private K8sCommonUtils k8sCommonUtils; public TrainJobApiImpl(K8sUtils k8sUtils) { this.k8sUtils = k8sUtils; @@ -124,15 +99,24 @@ public class TrainJobApiImpl implements TrainJobApi { * @return PtJupyterJobVO 训练任务 Job 结果类 */ @Override - public PtJupyterJobVO create(PtJupyterJobBO bo) { - try{ - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(),bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); - if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)){ + public PtJupyterJobVO create(PtJupyterJobBO bo) { + try { + BaseResourceBo baseResourceBo = new BaseResourceBo(); + BeanUtils.copyProperties(bo, baseResourceBo); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(baseResourceBo); + + if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new PtJupyterJobVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } - LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(),bo.getMemNum(),bo.getGpuNum()); - if (!LackOfResourcesEnum.ADEQUATE.equals(lack)){ - return new PtJupyterJobVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(),lack.getMessage()); + if (bo.getUseGpu()) { + Integer k8sGpuNumLimit = k8sGpuConfigService.getGpuLimit(bo.getNamespace(), bo.getGpuModel(), bo.getK8sLabelKey()); + if(bo.getGpuNum() > k8sGpuNumLimit){ + return new PtJupyterJobVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), LimitsOfResourcesEnum.LIMITS_OF_GPU.getMessage()); + } + } + LackOfResourcesEnum lack = nodeApi.isAllocatable(baseResourceBo); + if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { + return new PtJupyterJobVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } LogUtil.info(LogEnum.BIZ_K8S, "Params of creating Job--create:{}", bo); if (!fileStoreApi.createDirs(bo.getDirList().toArray(new String[MagicNumConstant.ZERO]))) { @@ -215,6 +199,7 @@ public class TrainJobApiImpl implements TrainJobApi { private String namespace; private String image; private Boolean useGpu; + private String gpuModel; private List cmdLines; private Map fsMounts; @@ -237,6 +222,7 @@ public class TrainJobApiImpl implements TrainJobApi { this.namespace = bo.getNamespace(); this.image = bo.getImage(); this.cmdLines = new ArrayList(); + this.gpuModel = bo.getGpuModel(); Optional.ofNullable(bo.getCmdLines()).ifPresent(v -> cmdLines = v); this.useGpu = bo.getUseGpu()==null?false:bo.getUseGpu(); if (bo.getUseGpu() != null && bo.getUseGpu() && null == bo.getGpuNum()){ @@ -245,13 +231,14 @@ public class TrainJobApiImpl implements TrainJobApi { this.resourcesLimitsMap = Maps.newHashMap(); Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); - Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); + Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(bo.getK8sLabelKey(), new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); + k8sCommonUtils.addRdmaResource(resourcesLimitsMap); this.fsMounts = bo.getFsMounts(); businessLabel = bo.getBusinessLabel(); + this.baseLabels = LabelUtils.getBaseLabels(baseName,bo.getBusinessLabel(),bo.getExtraLabelMap()); this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); - this.baseLabels = LabelUtils.getBaseLabels(baseName,bo.getBusinessLabel()); this.volumeMounts = new ArrayList<>(); this.volumes = new ArrayList<>(); @@ -429,7 +416,7 @@ public class TrainJobApiImpl implements TrainJobApi { //镜像 container.setName(jobName); container.setImage(image); - container.setImagePullPolicy(ImagePullPolicyEnum.IFNOTPRESENT.getPolicy()); + container.setImagePullPolicy(ImagePullPolicyEnum.ALWAYS.getPolicy()); container.setVolumeMounts(volumeMounts); //启动命令 container.setCommand(Collections.singletonList(ShellCommandEnum.BIN_BANSH.getShell())); @@ -440,9 +427,10 @@ public class TrainJobApiImpl implements TrainJobApi { .addToLimits(resourcesLimitsMap) .build()); - Map gpuLabel = new HashMap(1); - if (useGpu){ - gpuLabel.put(K8sLabelConstants.NODE_GPU_LABEL_KEY,K8sLabelConstants.NODE_GPU_LABEL_VALUE); + Map gpuLabel = new HashMap(2); + if (useGpu) { + gpuLabel.put(K8sLabelConstants.NODE_GPU_LABEL_KEY, K8sLabelConstants.NODE_GPU_LABEL_VALUE); + gpuLabel.put(K8sLabelConstants.NODE_GPU_MODEL_LABEL_KEY, gpuModel); } job = new JobBuilder() @@ -458,7 +446,7 @@ public class TrainJobApiImpl implements TrainJobApi { .withNewTemplate() .withNewMetadata() .withName(jobName) - .addToLabels(LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(),businessLabel, taskIdentifyLabel)) + .addToLabels(LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(),businessLabel,taskIdentifyLabel,baseLabels)) .withNamespace(namespace) .endMetadata() .withNewSpec() @@ -477,10 +465,10 @@ public class TrainJobApiImpl implements TrainJobApi { job = client.batch().jobs().create(job); LogUtil.info(LogEnum.BIZ_K8S, "{} deployed successfully", jobName); } - if (delayCreate > MagicNumConstant.ZERO || delayDelete > MagicNumConstant.ZERO){ - taskYamlBO.append(job); + if (delayCreate > MagicNumConstant.ZERO || delayDelete > MagicNumConstant.ZERO) { + taskYamlBO.append(job); } - + LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml info is : {}", jobName, YamlUtils.dumpAsYaml(job)); return job; } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java index 7963b29..c5a4995 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java @@ -263,7 +263,7 @@ public class ResourceCache { return redisUtils.hmset(taskIdentify, new HashMap(){{ put(StringConstant.CACHE_TASK_ID, taskId); put(StringConstant.CACHE_TASK_NAME, taskName); - }}, NumberConstant.MONTH_SECOND) && redisUtils.set(taskIdPrefix + String.valueOf(taskId), taskIdentify, NumberConstant.MONTH_SECOND); + }}) && redisUtils.set(taskIdPrefix + String.valueOf(taskId), taskIdentify); } /** @@ -281,8 +281,8 @@ public class ResourceCache { redisUtils.hmset(taskIdentify, new HashMap(){{ put(StringConstant.CACHE_TASK_ID, taskId); put(StringConstant.CACHE_TASK_NAME, taskName); - }}, NumberConstant.MONTH_SECOND); - redisUtils.set(taskIdPrefix + taskId, taskIdentify, NumberConstant.MONTH_SECOND); + }}); + redisUtils.set(taskIdPrefix + taskId, taskIdentify); } return taskIdentify; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java index 5ca236a..0f677a0 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java @@ -37,6 +37,7 @@ import org.dubhe.k8s.api.NodeApi; import org.dubhe.k8s.api.PersistentVolumeClaimApi; import org.dubhe.k8s.api.PodApi; import org.dubhe.k8s.api.ResourceQuotaApi; +import org.dubhe.k8s.api.ServiceApi; import org.dubhe.k8s.api.TerminalApi; import org.dubhe.k8s.api.TrainJobApi; import org.dubhe.k8s.api.impl.DistributeTrainApiImpl; @@ -53,6 +54,7 @@ import org.dubhe.k8s.api.impl.NodeApiImpl; import org.dubhe.k8s.api.impl.PersistentVolumeClaimApiImpl; import org.dubhe.k8s.api.impl.PodApiImpl; import org.dubhe.k8s.api.impl.ResourceQuotaApiImpl; +import org.dubhe.k8s.api.impl.ServiceApiImpl; import org.dubhe.k8s.api.impl.TerminalApiImpl; import org.dubhe.k8s.api.impl.TrainJobApiImpl; import org.dubhe.k8s.cache.ResourceCache; @@ -184,11 +186,11 @@ public class K8sConfig { } @Bean - public RestHighLevelClient restHighLevelClient(){ + public RestHighLevelClient restHighLevelClient() { String[] hosts = hostlist.split(COMMA); HttpHost[] httpHostArray = new HttpHost[hosts.length]; - for(int i=ZERO;i { +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sGpuConfigMapper.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sGpuConfigMapper.java new file mode 100644 index 0000000..8f1297a --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sGpuConfigMapper.java @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Select; +import org.dubhe.k8s.domain.entity.K8sGpuConfig; + +import java.util.List; +import java.util.Set; + +/** + * @description k8s GPU配置 Mapper + * @date 2021-9-2 + */ +public interface K8sGpuConfigMapper extends BaseMapper { + + /** + * 批量添加用户GPU配置 + * + * @param userGpuConfigs 用户GPU配置实体集合 + */ + void insertBatchs(List userGpuConfigs); + + /** + * 根据namespace查询用户GPU配置记录数 + * @param namespace 用户id + * @return Integer 用户GPU配置记录数 + */ + @Select("select count(*) from k8s_gpu_config where namespace= #{namespace}") + Integer selectCountByNamespace(@Param("namespace") String namespace); + +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sNodeMapper.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sNodeMapper.java new file mode 100644 index 0000000..cd980b9 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sNodeMapper.java @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.dubhe.k8s.domain.entity.K8sNode; + +/** + * @description k8s集群节点信息mapper接口 + * @date 2020-09-16 + */ +public interface K8sNodeMapper extends BaseMapper { +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sTaskIdentifyMapper.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sTaskIdentifyMapper.java new file mode 100644 index 0000000..01f5f85 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/dao/K8sTaskIdentifyMapper.java @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Select; +import org.apache.ibatis.annotations.Update; +import org.dubhe.k8s.domain.entity.K8sTaskIdentify; + +/** + * @description + * @date 2021-10-26 + */ +public interface K8sTaskIdentifyMapper extends BaseMapper { + + @Update("update k8s_task_identify set deleted = 1 where task_id = #{id}") + int deleteByTaskId(@Param("id") Long id); + + @Select("select * from k8s_task_identify where task_id= #{taskId} and deleted =0") + K8sTaskIdentify getInfoByTaskId(@Param("taskId") Long taskId); + + @Update("update k8s_task_identify set task_name=#{taskName} where task_id=#{taskId}") + int updateNameByTaskId(@Param("taskId") Long taskId, + @Param("taskName") String taskName); + +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BaseResourceBo.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BaseResourceBo.java new file mode 100644 index 0000000..f1375f7 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BaseResourceBo.java @@ -0,0 +1,90 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.bo; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.k8s.annotation.K8sValidation; +import org.dubhe.k8s.enums.ValidationTypeEnum; + +/** + * @description 资源通用属性基类 + * @date 2021-08-19 + */ +@Data +@Accessors(chain = true) +public class BaseResourceBo { + + /** + * 命名空间 + **/ + @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) + private String namespace; + /** + * 资源名称 + **/ + @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) + private String name; + + /** + * 内存数量单位 Mi + **/ + private Integer memNum; + + /** + * CPU数量 1000代表占用一个核心 + **/ + private Integer cpuNum; + + /** + * GPU数量,0表示共享显卡,null表示不使用显卡 + **/ + private Integer gpuNum; + + /**是否使用gpu true:使用;false:不用**/ + private Boolean useGpu; + + /** + * GPU类型(例如:NVIDIA) + */ + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + + /** + * 镜像名称 + **/ + private String image; + + /** + * 业务标签,用于标识业务模块 + **/ + private String businessLabel; + + /** + * 任务身份标签,用于标识任务身份 + **/ + private String taskIdentifyLabel; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java index 43a66f6..2a4487b 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java @@ -18,6 +18,7 @@ package org.dubhe.k8s.domain.bo; import cn.hutool.core.collection.CollectionUtil; +import io.fabric8.kubernetes.api.model.Quantity; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.biz.base.constant.MagicNumConstant; @@ -67,6 +68,22 @@ public class DeploymentBO { * CPU数量 **/ private Integer cpuNum; + /**是否使用gpu true:使用;false:不用**/ + private Boolean useGpu; + /** + * GPU类型(例如:NVIDIA) + */ + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; /** * 镜像名称 **/ @@ -94,6 +111,19 @@ public class DeploymentBO { private Set ports; /** + * 镜像拉取策略 + * IfNotPresent 默认值 + * Always + * Never + */ + private String imagePullPolicy; + + /** + * 自定义资源 + */ + private Map customResourcesLimitsMap; + + /** * 获取nfs路径 * @return */ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java index d8802cc..1cc84cd 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java @@ -21,9 +21,9 @@ import cn.hutool.core.collection.CollectionUtil; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.k8s.annotation.K8sValidation; import org.dubhe.k8s.enums.ValidationTypeEnum; -import org.dubhe.biz.base.utils.StringUtils; import java.util.ArrayList; import java.util.HashMap; @@ -73,13 +73,21 @@ public class DistributeTrainBO { **/ private Integer gpuNum; /** + * GPU型号(例如:nvidia-v100) + */ + private String gpuModel; + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + /** * slave机器运行时执行命令 **/ private String slaveCmd; /** * 运行环境变量 **/ - private Map env; + private Map env; /** * 业务标签,用于标识业务模块 **/ @@ -99,7 +107,7 @@ public class DistributeTrainBO { /** * 文件存储服务挂载 key:pod内挂载路径 value:文件存储路径及配置 **/ - private Map fsMounts; + private Map fsMounts; /** * 设置文件存储挂载 @@ -107,12 +115,12 @@ public class DistributeTrainBO { * @param dir 文件存储服务路径 * @return */ - public DistributeTrainBO putFsMounts(String mountPath,String dir){ - if (StringUtils.isNotEmpty(mountPath) && StringUtils.isNotEmpty(dir)){ - if (fsMounts == null){ + public DistributeTrainBO putFsMounts(String mountPath, String dir) { + if (StringUtils.isNotEmpty(mountPath) && StringUtils.isNotEmpty(dir)) { + if (fsMounts == null) { fsMounts = new HashMap<>(MagicNumConstant.EIGHT); } - fsMounts.put(mountPath,new PtMountDirBO(dir)); + fsMounts.put(mountPath, new PtMountDirBO(dir)); } return this; } @@ -123,12 +131,12 @@ public class DistributeTrainBO { * @param dir 文件存储服务路径及配置 * @return */ - public DistributeTrainBO putFsMounts(String mountPath,PtMountDirBO dir){ - if (StringUtils.isNotEmpty(mountPath) && dir != null){ - if (fsMounts == null){ + public DistributeTrainBO putFsMounts(String mountPath, PtMountDirBO dir) { + if (StringUtils.isNotEmpty(mountPath) && dir != null) { + if (fsMounts == null) { fsMounts = new HashMap<>(MagicNumConstant.EIGHT); } - fsMounts.put(mountPath,dir); + fsMounts.put(mountPath, dir); } return this; } @@ -137,8 +145,8 @@ public class DistributeTrainBO { * 获取 文件存储服务路径列表 * @return */ - public List getDirList(){ - if (CollectionUtil.isNotEmpty(fsMounts)){ + public List getDirList() { + if (CollectionUtil.isNotEmpty(fsMounts)) { return fsMounts.values().stream().map(PtMountDirBO::getDir).collect(Collectors.toList()); } return new ArrayList<>(); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/LogMonitoringBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/LogMonitoringBO.java index 832261a..3bef045 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/LogMonitoringBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/LogMonitoringBO.java @@ -17,6 +17,7 @@ package org.dubhe.k8s.domain.bo; import lombok.Data; import lombok.experimental.Accessors; +import org.dubhe.biz.base.enums.BizEnum; import org.dubhe.k8s.domain.dto.PodLogQueryDTO; import java.util.Set; @@ -56,6 +57,26 @@ public class LogMonitoringBO { **/ private Long endTimeMillis; + /** + * 日志查询起始行 + **/ + private Integer from; + + /** + * 日志查询行数 + **/ + private Integer size; + + /** + * 业务标签,用于标识一个组的业务模块 比如:TRAIN模块的trainId, TADL模块的experimentId + */ + private String businessGroupId; + + /** + * 业务标签,用于标识业务模块 + */ + private BizEnum business; + public LogMonitoringBO(String namespace,String podName){ this.namespace = namespace; this.podName = podName; diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java index b58e2f4..3a21866 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java @@ -65,6 +65,14 @@ public class ModelServingBO { **/ private Integer cpuNum; /** + * GPU型号(例如:nvidia-v100) + */ + private String gpuModel; + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + /** * 镜像名称 **/ private String image; diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java index 1c5cb88..95904dc 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java @@ -17,10 +17,13 @@ package org.dubhe.k8s.domain.bo; +import cn.hutool.core.collection.CollUtil; import cn.hutool.core.util.NumberUtil; +import cn.hutool.core.util.StrUtil; import lombok.Data; import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.functional.StringFormat; +import org.dubhe.biz.base.vo.UserAllotVO; import org.dubhe.k8s.domain.vo.GpuTotalMemResultVO; import org.dubhe.k8s.domain.vo.MetricsDataResultValueVO; import org.springframework.util.CollectionUtils; @@ -203,6 +206,31 @@ public class PrometheusMetricBO { return list; } + public List getUsageRateResults() { + List list = new ArrayList<>(); + if (data == null || CollUtil.isEmpty(data.getResult())) { + return list; + } + data.getResult().forEach(metricResult -> { + UserAllotVO userAllotVO = new UserAllotVO(); + userAllotVO.setUserName(metricResult.getMetric().getNamespace()); + userAllotVO.setAllotTotal(metricResult.getValue().get(1).toString()); + list.add(userAllotVO); + }); + return list; + } + + public Map getResourceUsageResults() { + Map resMap = new HashMap<>(); + if (data == null || CollUtil.isEmpty(data.getResult())) { + return resMap; + } + data.getResult().forEach(metricResult -> { + resMap.put(Long.valueOf(metricResult.getMetric().getNamespace().replace("namespace-", StrUtil.EMPTY)), metricResult.getValue().get(1).toString()); + }); + return resMap; + } + } @Data @@ -222,5 +250,6 @@ class MetricResult { class Metric { private String acc_id; private String pod; + private String namespace; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PromethusNodeMetricsBo.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PromethusNodeMetricsBo.java new file mode 100644 index 0000000..9c55589 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PromethusNodeMetricsBo.java @@ -0,0 +1,80 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.bo; + +import cn.hutool.core.collection.CollUtil; +import lombok.Data; +import org.dubhe.k8s.domain.vo.GpuUsageVO; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * @description + * @date 2021-11-11 + */ +@Data +public class PromethusNodeMetricsBo { + + private String status; + + private MetricNodeData data; + + public Map> getGpuUsageResults() { + Map> gpuUsageMap = new HashMap<>(); + + if (data == null || CollUtil.isEmpty(data.getResult())) { + return gpuUsageMap; + } + for (MetricNodeResult result : data.getResult()) { + List gpuUsageList = new ArrayList<>(); + GpuUsageVO gpuUsageVO = new GpuUsageVO(result.getMetric().getGpu(), result.getValue().get(1).toString()); + String nodeName = result.getMetric().getHostname(); + gpuUsageList.add(gpuUsageVO); + if (gpuUsageMap.containsKey(nodeName)) { + gpuUsageList.addAll(gpuUsageMap.get(nodeName)); + } + gpuUsageMap.put(nodeName, gpuUsageList.stream().sorted(Comparator.comparing(GpuUsageVO::getAccId)).collect(Collectors.toList())); + } + return gpuUsageMap; + } +} + +@Data +class MetricNodeData { + private String resultType; + private List result; + +} + +@Data +class MetricNodeResult { + private MetricNode metric; + List value; + +} + +@Data +class MetricNode { + private String Hostname; + private String gpu; +} + diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java index 55e45af..f47d051 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java @@ -21,10 +21,8 @@ import cn.hutool.core.collection.CollectionUtil; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.biz.base.constant.MagicNumConstant; -import org.dubhe.k8s.annotation.K8sValidation; -import org.dubhe.k8s.enums.GraphicsCardTypeEnum; -import org.dubhe.k8s.enums.ValidationTypeEnum; import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.k8s.enums.GraphicsCardTypeEnum; import java.util.ArrayList; import java.util.HashMap; @@ -38,33 +36,21 @@ import java.util.stream.Collectors; */ @Data @Accessors(chain = true) -public class PtJupyterJobBO { - /**命名空间**/ - @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) - private String namespace; - /**资源名称**/ - @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) - private String name; - /**GPU数量,1代表使用一张显卡**/ - private Integer gpuNum; - /**是否使用gpu true:使用;false:不用**/ - private Boolean useGpu; - /**内存数量,单位Mi**/ - private Integer memNum; - /**cpu用量 单位:m 1个核心=1000m**/ - private Integer cpuNum; +public class PtJupyterJobBO extends BaseResourceBo { /**镜像名称**/ private String image; /**执行命令**/ private List cmdLines; /**文件存储服务挂载 key:pod内挂载路径 value:文件存储路径及配置**/ - private Map fsMounts; + private Map fsMounts; /**显卡类型**/ private GraphicsCardTypeEnum graphicsCardType; /**业务标签,用于标识业务模块**/ private String businessLabel; + /**额外扩展的标签**/ + private Map extraLabelMap; /**任务身份标签,用于标识任务身份**/ private String taskIdentifyLabel; /**延时创建时间,单位:分钟**/ @@ -77,29 +63,29 @@ public class PtJupyterJobBO { private String pipSitePackageMountPath; - public List getDirList(){ - if (CollectionUtil.isNotEmpty(fsMounts)){ + public List getDirList() { + if (CollectionUtil.isNotEmpty(fsMounts)) { return fsMounts.values().stream().map(PtMountDirBO::getDir).collect(Collectors.toList()); } return new ArrayList<>(); } - public PtJupyterJobBO putFsMounts(String mountPath,String dir){ - if (StringUtils.isNotEmpty(mountPath) && StringUtils.isNotEmpty(dir)){ - if (fsMounts == null){ + public PtJupyterJobBO putFsMounts(String mountPath, String dir) { + if (StringUtils.isNotEmpty(mountPath) && StringUtils.isNotEmpty(dir)) { + if (fsMounts == null) { fsMounts = new HashMap<>(MagicNumConstant.TWO); } - fsMounts.put(mountPath,new PtMountDirBO(dir)); + fsMounts.put(mountPath, new PtMountDirBO(dir)); } return this; } - public PtJupyterJobBO putFsMounts(String mountPath,PtMountDirBO dir){ - if (StringUtils.isNotEmpty(mountPath) && dir != null){ - if (fsMounts == null){ + public PtJupyterJobBO putFsMounts(String mountPath, PtMountDirBO dir) { + if (StringUtils.isNotEmpty(mountPath) && dir != null) { + if (fsMounts == null) { fsMounts = new HashMap<>(MagicNumConstant.TWO); } - fsMounts.put(mountPath,dir); + fsMounts.put(mountPath, dir); } return this; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java index f070686..8cb64f3 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java @@ -56,6 +56,14 @@ public class PtJupyterResourceBO { **/ private Integer cpuNum; /** + * GPU型号(例如:v100) + */ + private String gpuModel; + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + private String k8sLabelKey; + /** * 镜像名称 **/ private String image; @@ -105,12 +113,4 @@ public class PtJupyterResourceBO { * 定时删除时间,单位:分钟 **/ private Integer delayDeleteTime; - /** - * pip包路径 - */ - private String pipSitePackageDir; - /** - * k8s内pip包路径 - */ - private String pipSitePackageMountPath; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationDeploymentBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationDeploymentBO.java index 3321116..a575cee 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationDeploymentBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationDeploymentBO.java @@ -27,7 +27,7 @@ import java.util.List; * @date 2020-05-26 */ @Data -public class PtModelOptimizationDeploymentBO extends PtDeploymentBO { +public class PtModelOptimizationDeploymentBO extends BaseResourceBo { /** * 挂载到dataset的数据集的路径 **/ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationJobBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationJobBO.java index 8eb4610..7b82288 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationJobBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtModelOptimizationJobBO.java @@ -33,7 +33,7 @@ import java.util.stream.Collectors; * @date 2020-05-31 */ @Data -public class PtModelOptimizationJobBO extends PtJobBO { +public class PtModelOptimizationJobBO extends BaseResourceBo { /** * 执行命令 **/ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java index 9358b41..16dc36b 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java @@ -17,13 +17,13 @@ package org.dubhe.k8s.domain.bo; +import lombok.Data; +import lombok.experimental.Accessors; import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.k8s.annotation.K8sValidation; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.resource.BizQuantity; import org.dubhe.k8s.domain.resource.BizScopedResourceSelectorRequirement; -import lombok.Data; -import lombok.experimental.Accessors; import org.dubhe.k8s.enums.ValidationTypeEnum; import java.util.HashMap; @@ -49,11 +49,11 @@ public class PtResourceQuotaBO { * @param amount 值 * @param format 单位 */ - public void addCpuLimitsHard(String amount,String format){ - if (hard == null){ + public void addCpuLimitsHard(String amount, String format) { + if (hard == null) { hard = new HashMap<>(); } - hard.put(K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY,new BizQuantity(amount,format)); + hard.put(K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY, new BizQuantity(amount, format)); } /** @@ -61,21 +61,23 @@ public class PtResourceQuotaBO { * @param amount 值 * @param format 单位 */ - public void addMemoryLimitsHard(String amount,String format){ - if (hard == null){ + public void addMemoryLimitsHard(String amount, String format) { + if (hard == null) { hard = new HashMap<>(); } - hard.put(K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY,new BizQuantity(amount,format)); + hard.put(K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY, new BizQuantity(amount, format)); } /** * 添加gpu 限制 - * @param amount 值 + * @param map 值 */ - public void addGpuLimitsHard(String amount){ - if (hard == null){ + public void addGpuLimitsHard(Map map) { + if (hard == null) { hard = new HashMap<>(); } - hard.put(K8sParamConstants.RESOURCE_QUOTA_GPU_LIMITS_KEY,new BizQuantity(amount, SymbolConstant.BLANK)); + for (Map.Entry entry : map.entrySet()) { + hard.put(K8sParamConstants.K8S_LABEL_KEY_PREFIX + entry.getKey(), new BizQuantity(entry.getValue(), SymbolConstant.BLANK)); + } } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/BaseK8sPodCallbackCreateDTO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/BaseK8sPodCallbackCreateDTO.java index c283837..ac53965 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/BaseK8sPodCallbackCreateDTO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/BaseK8sPodCallbackCreateDTO.java @@ -22,6 +22,7 @@ import io.swagger.annotations.ApiModelProperty; import lombok.Data; import javax.validation.constraints.NotEmpty; +import java.util.Map; /** * @descripton 统一通用参数实现与校验 @@ -58,6 +59,9 @@ public class BaseK8sPodCallbackCreateDTO { @ApiModelProperty(value = "k8s pod containerStatuses state") private String messages; + @ApiModelProperty(value = "k8s pod lables") + private Map lables; + public BaseK8sPodCallbackCreateDTO(){ } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/K8sGpuConfigDTO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/K8sGpuConfigDTO.java new file mode 100644 index 0000000..078ad6a --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/K8sGpuConfigDTO.java @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.dto.SysUserGpuConfigDTO; + +import javax.validation.constraints.NotBlank; +import java.io.Serializable; +import java.util.List; + +/** + * @description 用户配置DTO + * @date 2021-09-06 + */ +@Data +@Accessors(chain = true) +public class K8sGpuConfigDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "命名空间", required = true) + @NotBlank(message = "命名空间不能为空") + private String namespace; + + /** + * GPU 资源限制 + */ + private List gpuResources; +} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/NodeInfoDTO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/NodeInfoDTO.java new file mode 100644 index 0000000..188faff --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/dto/NodeInfoDTO.java @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.domain.dto; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; + +/** + * @description k8s节点信息DTO + * @date 2021-09-17 + */ +@Data +@Api("k8s节点信息DTO") +public class NodeInfoDTO { + @ApiModelProperty(value = "id") + private Long id; + + @ApiModelProperty(value = "节点名") + private String name; + + @ApiModelProperty(value = "备注") + private String remark; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sCallbackEvent.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sCallbackEvent.java new file mode 100644 index 0000000..f150ab5 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sCallbackEvent.java @@ -0,0 +1,62 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.dubhe.biz.db.entity.BaseEntity; + +import java.util.Date; + +/** + * @description k8s callback event信息 + * @date 2021-11-15 + */ +@Data +@EqualsAndHashCode(callSuper = true) +@TableName("k8s_callback_event") +public class K8sCallbackEvent extends BaseEntity { + @TableId(value = "id", type = IdType.AUTO) + @ApiModelProperty(hidden = true) + private Long id; + + @TableField(value = "resource_name") + private String resourceName; + + @TableField(value = "event_type") + private String eventType; + + @TableField(value = "business_type") + private String businessType; + + @TableField(value = "message") + private String message; + + @TableField(value = "start_time") + private String startTime; + + @TableField(value = "finish_time") + private String finishTime; + + @TableField(value = "container_id") + private String containerId; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sGpuConfig.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sGpuConfig.java new file mode 100644 index 0000000..9b8f422 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sGpuConfig.java @@ -0,0 +1,70 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; + +import javax.validation.constraints.NotNull; + +/** + * @description 用户GPU配置实体 + * @date 2021-08-20 + */ +@Data +@Accessors(chain = true) +@TableName("k8s_gpu_config") +public class K8sGpuConfig extends BaseEntity { + + /** + * 主键ID + */ + @TableId(value = "id", type = IdType.AUTO) + @NotNull(groups = {Update.class}) + private Long id; + + @TableField(value = "namespace") + private String namespace; + /** + * GPU类型(例如:NVIDIA) + */ + @TableField(value = "gpu_type") + private String gpuType; + + /** + * GPU型号(例如:v100) + */ + @TableField(value = "gpu_model") + private String gpuModel; + + /** + * k8s GPU资源标签key值(例如:nvidia.com/gpu) + */ + @TableField(value = "k8s_label_key") + private String k8sLabelKey; + + /** + * 用户显卡资源限制配置,单位:卡 + */ + @TableId(value = "gpu_limit") + private Integer gpuLimit; +} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sNode.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sNode.java new file mode 100644 index 0000000..655b91c --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sNode.java @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.dubhe.biz.db.entity.BaseEntity; + +/** + * @description k8s集群节点信息对象 + * @date 2021-09-16 + */ +@Data +@AllArgsConstructor +@NoArgsConstructor +@TableName("k8s_node") +public class K8sNode extends BaseEntity{ + @TableId(value = "id", type = IdType.AUTO) + @ApiModelProperty(hidden = true) + private Long id; + + @TableField(value = "name") + private String name; + + @TableField(value = "remark") + private String remark; + + public K8sNode(String name,String remark){ + this.name = name; + this.remark = remark; + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sTaskIdentify.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sTaskIdentify.java new file mode 100644 index 0000000..cce3268 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/entity/K8sTaskIdentify.java @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.dubhe.biz.db.entity.BaseEntity; + +/** + * @description + * @date 2021-10-26 + */ +@Data +@AllArgsConstructor +@NoArgsConstructor +@TableName("k8s_task_identify") +public class K8sTaskIdentify extends BaseEntity { + + @TableId(value = "id", type = IdType.AUTO) + @ApiModelProperty(hidden = true) + private Long id; + + @TableField(value = "task_id") + private Long taskId; + + @TableField(value = "task_name") + private String taskName; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerLastStateTerminated.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerLastStateTerminated.java new file mode 100644 index 0000000..5a03d50 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerLastStateTerminated.java @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.resource; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.k8s.annotation.K8sField; + +/** + * @description: + * @date: 2021/11/18 + */ +@Data +@Accessors(chain = true) +public class BizContainerLastStateTerminated { + @K8sField("finishedAt") + private String finishedAt; + + @K8sField("reason") + private String reason; + + @K8sField("startedAt") + private String startedAt; + + @K8sField("exitCode") + private Integer exitCode; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java index afbbd73..61697b8 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java @@ -33,6 +33,10 @@ public class BizContainerStatus { */ @K8sField("state:terminated") private BizContainerStateTerminated terminated; + + @K8sField("lastState:terminated") + private BizContainerLastStateTerminated lastStateTerminated; + /** * Details about a waiting container */ @@ -41,4 +45,7 @@ public class BizContainerStatus { @K8sField("containerID") private String containerID; + + @K8sField("restartCount") + private Integer restartCount; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java index 9b55ab7..d74e0ec 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java @@ -17,12 +17,14 @@ package org.dubhe.k8s.domain.resource; +import com.google.common.collect.Maps; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.k8s.annotation.K8sField; import org.springframework.util.CollectionUtils; import java.util.List; +import java.util.Map; /** * @description Kubernetes Service @@ -43,6 +45,9 @@ public class BizService { @K8sField("spec:ports") private List ports; + @K8sField("metadata:labels") + private Map labels = Maps.newHashMap(); + public BizServicePort getServicePortByTargetPort(Integer targetPort){ if (CollectionUtils.isEmpty(ports) || targetPort == null){ return null; @@ -54,4 +59,14 @@ public class BizService { } return null; } + + /** + * 根据键获取label + * + * @param labelKey + * @return + */ + public String getLabel(String labelKey) { + return labels.get(labelKey); + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuUsageVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuUsageVO.java index 2f3a2d7..f8c57fa 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuUsageVO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuUsageVO.java @@ -32,7 +32,7 @@ public class GpuUsageVO { */ private String accId; /** - * 使用率 百分比 + * 使用率 */ - Float usage; + private String usage; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sEventVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sEventVO.java new file mode 100644 index 0000000..8613c3c --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sEventVO.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.vo; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * @description: k8s事件VO + * @date: 2021/11/17 + */ +@Data +@AllArgsConstructor +@NoArgsConstructor +public class K8sEventVO { + /** + * 资源名称 + */ + private String resourceName; + + /** + * 事件类型 + */ + private String type; + + /** + * 事件信息 + */ + private String message; + + /** + * 事件发生事件 + */ + private String startTime; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sResourceEventResultVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sResourceEventResultVO.java new file mode 100644 index 0000000..8d16fdc --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/K8sResourceEventResultVO.java @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.vo; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; + +import java.util.List; + +/** + * @description: + * @date: 2021/11/17 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Accessors(chain = true) +public class K8sResourceEventResultVO { + /** + * k8s资源名称 + */ + private String resourceName; + + /** + * k8s事件列表 + */ + private List eventVOList; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/LogMonitoringVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/LogMonitoringVO.java index 68ff8a4..88502ed 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/LogMonitoringVO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/LogMonitoringVO.java @@ -30,7 +30,7 @@ import java.util.List; @Data @AllArgsConstructor public class LogMonitoringVO extends PtBaseResult { - private Long totalLogs; + private Integer totalLogs; private List logs; public LogMonitoringVO() { diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java index 275add2..b1af638 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java @@ -17,10 +17,14 @@ package org.dubhe.k8s.enums; +import lombok.Getter; import org.dubhe.biz.base.constant.ApplicationNameConst; import org.dubhe.biz.base.enums.BizEnum; import org.dubhe.biz.base.utils.StringUtils; +import java.util.ArrayList; +import java.util.List; + import static org.dubhe.biz.base.constant.SymbolConstant.BLANK; /** @@ -52,7 +56,10 @@ public enum BusinessLabelServiceNameEnum { * 专业版终端 */ TERMINAL(BizEnum.TERMINAL.getBizCode(), ApplicationNameConst.TERMINAL), - ; + /** + * TADL + */ + TADL(BizEnum.TADL.getBizCode(), ApplicationNameConst.SERVER_TADL); /** * 业务标签 */ @@ -74,21 +81,34 @@ public enum BusinessLabelServiceNameEnum { this.businessLabel = businessLabel; this.serviceName = serviceName; } - public static String getServiceNameByBusinessLabel(String businessLabel){ + + public static String getServiceNameByBusinessLabel(String businessLabel) { for (BusinessLabelServiceNameEnum businessLabelServiceNameEnum : BusinessLabelServiceNameEnum.values()) { - if (StringUtils.equals(businessLabel, businessLabelServiceNameEnum.getBusinessLabel() )){ + if (StringUtils.equals(businessLabel, businessLabelServiceNameEnum.getBusinessLabel())) { return businessLabelServiceNameEnum.getServiceName(); } } return BLANK; } - public static String getBusinessLabelByServiceName(String serviceName){ + public static String getBusinessLabelByServiceName(String serviceName) { for (BusinessLabelServiceNameEnum businessLabelServiceNameEnum : BusinessLabelServiceNameEnum.values()) { - if (StringUtils.equals(serviceName, businessLabelServiceNameEnum.getServiceName() )){ + if (StringUtils.equals(serviceName, businessLabelServiceNameEnum.getServiceName())) { return businessLabelServiceNameEnum.getBusinessLabel(); } } return BLANK; } + + /** + * 获取需要持久化事件的业务类型 + * @return + */ + public static List getEventBusinessList() { + return new ArrayList(){{ + add(NOTEBOOK.businessLabel); + add(TERMINAL.businessLabel); + add(TRAIN.businessLabel); + }}; + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/K8sEventTypeEnum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/K8sEventTypeEnum.java new file mode 100644 index 0000000..7e72b71 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/K8sEventTypeEnum.java @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.enums; + +import lombok.Getter; +import lombok.Setter; +import org.dubhe.biz.base.constant.SymbolConstant; + +/** + * @description: + * @date: 2021/11/15 + */ +public enum K8sEventTypeEnum { + OOMKilled("OOMKilled", "请检查内存配置是否满足运行要求"), + COMPLETED("Completed", "pod正常终止事件"), + ; + + /** + * k8s callback回调的reason + */ + @Getter + @Setter + private String reason; + + @Getter + @Setter + private String message; + + K8sEventTypeEnum(String reason, String message) { + this.reason = reason; + this.message = message; + } + + /** + * 由event的reason获取类型enum + * @param reason + * @return + */ + public static K8sEventTypeEnum to(String reason) { + for (K8sEventTypeEnum type : K8sEventTypeEnum.values()) { + if (type.getReason().equals(reason)) { + return type; + } + } + return K8sEventTypeEnum.COMPLETED; + } + + /** + * 构造完整的消息 + * @param typeEnum + * @return + */ + public static String buildMessage(K8sEventTypeEnum typeEnum) { + return typeEnum.getReason() + SymbolConstant.COLON + SymbolConstant.SPACE + typeEnum.getMessage(); + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/LimitsOfResourcesEnum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/LimitsOfResourcesEnum.java index f91f29f..3cadd9c 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/LimitsOfResourcesEnum.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/LimitsOfResourcesEnum.java @@ -37,9 +37,8 @@ public enum LimitsOfResourcesEnum { /** * gpu不足 */ - LIMITS_OF_GPU(3, "gpu用量超限"), - ; - + LIMITS_OF_GPU(3, "gpu用量超限"); + LimitsOfResourcesEnum(int code, String message) { this.code = code; this.message = message; diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/listener/DefaultPodExecListener.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/listener/DefaultPodExecListener.java new file mode 100644 index 0000000..2d497e3 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/listener/DefaultPodExecListener.java @@ -0,0 +1,87 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.listener; + +import io.fabric8.kubernetes.client.dsl.ExecListener; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import okhttp3.Response; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; + +import java.util.concurrent.CountDownLatch; + + /** + * @description 默认命令执行监听器 + * @date 2020-09-23 + */ + @Slf4j + @Getter + public class DefaultPodExecListener implements ExecListener { + + /** + * pod名称 + */ + private String podName; + + /** + * 命名空间 + */ + private String namespace; + + /** + * 容器名称 + */ + private String containerName; + + /** + * 执行门栓 线程通信用 + */ + private CountDownLatch execLatch; + + public DefaultPodExecListener(String podName, String namespace, String containerName, CountDownLatch execLatch) { + this.podName = podName; + this.namespace = namespace; + this.containerName = containerName; + this.execLatch = execLatch; + } + + @Override + public void onOpen(Response response) { + LogUtil.error(LogEnum.BIZ_K8S,"shell environment in pod '{}', namespace '{}' is opened", podName, namespace); + LogUtil.error(LogEnum.BIZ_K8S,"onOpen: {}", response); + } + + @Override + public void onFailure(Throwable t, Response response) { + LogUtil.error(LogEnum.BIZ_K8S,"shell environment in pod '{}', namespace '{}' barfed", podName, namespace); + LogUtil.error(LogEnum.BIZ_K8S,"onFailure: {} {}", t.getMessage(), response); + if (execLatch != null) { + execLatch.countDown(); + } + } + + @Override + public void onClose(int code, String reason) { + LogUtil.error(LogEnum.BIZ_K8S,"shell environment in pod '{}', namespace '{}' closed", podName, namespace); + LogUtil.error(LogEnum.BIZ_K8S,"onClose: {} {}", code, reason); + if (execLatch != null) { + execLatch.countDown(); + } + } + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sCallbackEventService.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sCallbackEventService.java new file mode 100644 index 0000000..e05720a --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sCallbackEventService.java @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.service; + +import org.dubhe.k8s.domain.entity.K8sCallbackEvent; +import org.dubhe.k8s.domain.vo.K8sEventVO; +import org.dubhe.k8s.domain.vo.K8sResourceEventResultVO; + +import java.util.List; + +/** + * @description: + * @date: 2021/11/15 + */ +public interface K8sCallbackEventService { + /** + * 插入k8s callback event事件的信息 + * @param k8sCallbackEvent + * @return + */ + boolean insertOrUpdate(K8sCallbackEvent k8sCallbackEvent); + + + /** + * 根据resourceName列表查询对应的历史事件,返回resourceName和事件列表对应关系 + * @param resourceNames + * @return + */ + List batchQueryByResourceName(List resourceNames); + + /** + * 根据resourceName列表查询对应的所有历史事件 + * @param resourceNames + * @return + */ + List queryByResourceName(List resourceNames); + + /** + * 根据resourceName和businessType删除单条记录 + * @param resourceName + * @param businessType + * @return + */ + boolean delete(String resourceName, String businessType); +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sGpuConfigService.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sGpuConfigService.java new file mode 100644 index 0000000..c791d5b --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sGpuConfigService.java @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.service; + +import org.dubhe.k8s.domain.dto.K8sGpuConfigDTO; +import org.dubhe.k8s.domain.entity.K8sGpuConfig; + +import java.util.List; + +/** + * @description 用户GPU配置管理服务接口 + * @date 2021-9-6 + */ +public interface K8sGpuConfigService { + + /** + * 根据用户 namespace 查询用户配置 + * + * @param namespace 命名空间 + * @return List 用户配置 VO + */ + List findGpuConfig(String namespace); + + /** + * 获取用户显卡资源限制配置 + * @param namespace 命名空间 + * @param gpuModel GPU型号 + * @param k8sLabelKey k8s GPU资源标签key值 + * @return 用户显卡资源限制配置,单位:卡 + */ + Integer getGpuLimit(String namespace,String gpuModel,String k8sLabelKey); + + + /** + * 创建或更新k8s GPU配置 + * @param k8sGpuConfigDTO k8s GPU配置实体 + * @return + */ + void UpdateGpuConfig(K8sGpuConfigDTO k8sGpuConfigDTO); + + /** + * 删除k8s资源配置 + * @param namespaces 命名空间 + */ + void delete(List namespaces); +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sNodeService.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sNodeService.java new file mode 100644 index 0000000..fe89491 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/K8sNodeService.java @@ -0,0 +1,87 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.service; + +import org.dubhe.k8s.domain.bo.K8sTaskBO; +import org.dubhe.k8s.domain.entity.K8sNode; +import org.dubhe.k8s.domain.entity.K8sTask; + +import java.util.List; + +/** + * @description k8s集群节点信息服务 + * @date 2021-09-16 + */ +public interface K8sNodeService { + /** + * 创建或者更新 + * + * @param k8sNode + * @return int 插入数量 + */ + int createOrUpdateTask(K8sNode k8sNode); + + /** + * 修改 + * + * @param k8sNode + * @return int 更新数量 + */ + int update(K8sNode k8sNode); + + /** + * 根据namesapce 和 resourceName 查询 + * @param k8sTask + * @return + */ + List selectByNamespaceAndResourceName(K8sTask k8sTask); + + /** + * 根据条件查询未执行的任务 + * @param k8sTaskBO k8s任务参数 + * @return List k8s任务类集合 + */ + List selectUnexecutedTask(K8sTaskBO k8sTaskBO); + + /** + * 查询未执行的任务 + * @return List k8s任务类集合 + */ + List selectUnexecutedTask(); + /** + * 添加redis延时队列 + * @param k8sTask + * @return + */ + + boolean addRedisDelayTask(K8sTask k8sTask); + + /** + * 加载任务到延时队列 + */ + void loadTaskToRedis(); + + /** + * 根据namespace 和 resourceName 删除 + * + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return boolean + */ + boolean deleteByNamespaceAndResourceName(String namespace,String resourceName); +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sCallbackEventServiceImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sCallbackEventServiceImpl.java new file mode 100644 index 0000000..03c6271 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sCallbackEventServiceImpl.java @@ -0,0 +1,139 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.service.impl; + +import cn.hutool.core.collection.CollectionUtil; +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import org.apache.commons.compress.utils.Lists; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.k8s.dao.K8sCallbackEventMapper; +import org.dubhe.k8s.domain.entity.K8sCallbackEvent; +import org.dubhe.k8s.domain.vo.K8sEventVO; +import org.dubhe.k8s.domain.vo.K8sResourceEventResultVO; +import org.dubhe.k8s.service.K8sCallbackEventService; +import org.springframework.stereotype.Service; + +import javax.annotation.Resource; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * @description: + * @date: 2021/11/15 + */ +@Service +public class K8sCallbackEventServiceImpl implements K8sCallbackEventService { + @Resource + private K8sCallbackEventMapper k8sCallbackEventMapper; + + /** + * @see K8sCallbackEventService#insertOrUpdate(K8sCallbackEvent) + */ + @Override + public boolean insertOrUpdate(K8sCallbackEvent k8sCallbackEvent) { + QueryWrapper queryWrapper = new QueryWrapper<>(); + queryWrapper.eq("resource_name", k8sCallbackEvent.getResourceName()); + queryWrapper.eq("container_id", k8sCallbackEvent.getContainerId()); + queryWrapper.eq("deleted", MagicNumConstant.ZERO); + queryWrapper.eq("event_type", k8sCallbackEvent.getEventType()); + queryWrapper.eq("business_type", k8sCallbackEvent.getBusinessType()); + if (k8sCallbackEventMapper.update(k8sCallbackEvent, queryWrapper) > 0) { + return true; + } else { + return k8sCallbackEventMapper.insert(k8sCallbackEvent) > 0; + } + } + + /** + * @see K8sCallbackEventService#batchQueryByResourceName(List) + */ + @Override + public List batchQueryByResourceName(List resourceNames) { + QueryWrapper queryWrapper = new QueryWrapper(); + queryWrapper.in("resource_name", resourceNames); + queryWrapper.eq("deleted", MagicNumConstant.ZERO); + List callbackEvents = k8sCallbackEventMapper.selectList(queryWrapper); + if (CollectionUtil.isEmpty(callbackEvents)) { + return Lists.newArrayList(); + } + + Map> k8sCallbackEventMap = new HashMap<>(); + callbackEvents.forEach(e -> { + if (k8sCallbackEventMap.containsKey(e.getResourceName())) { + List events = k8sCallbackEventMap.get(e.getResourceName()); + events.add(e); + } else { + k8sCallbackEventMap.put(e.getResourceName(), new ArrayList() {{add(e);}}); + } + }); + + return k8sCallbackEventMap.entrySet().stream().map(entry -> { + K8sResourceEventResultVO resultVO = new K8sResourceEventResultVO(); + resultVO.setResourceName(entry.getKey()); + resultVO.setEventVOList(convert(entry.getValue())); + return resultVO; + }).collect(Collectors.toList()); + } + + /** + * @see K8sCallbackEventService#queryByResourceName(List) + */ + @Override + public List queryByResourceName(List resourceNames) { + QueryWrapper queryWrapper = new QueryWrapper(); + queryWrapper.in("resource_name", resourceNames); + queryWrapper.eq("deleted", MagicNumConstant.ZERO); + List callbackEvents = k8sCallbackEventMapper.selectList(queryWrapper); + if (CollectionUtil.isEmpty(callbackEvents)) { + return Lists.newArrayList(); + } + return convert(callbackEvents); + } + + /** + * @see K8sCallbackEventService#delete(String, String) + */ + @Override + public boolean delete(String resourceName, String businessType) { + QueryWrapper queryWrapper = new QueryWrapper<>(); + queryWrapper.eq("resource_name", resourceName); + queryWrapper.eq("business_type", businessType); + return k8sCallbackEventMapper.delete(queryWrapper) > 0; + } + + /** + * 数据库Entity转换成VO对象 + * + * @param callbackEvents + * @return + */ + private List convert(List callbackEvents) { + return callbackEvents.stream().map( e -> + { + K8sEventVO k8sEventVO = new K8sEventVO(); + k8sEventVO.setType(e.getEventType()); + k8sEventVO.setMessage(e.getMessage()); + k8sEventVO.setResourceName(e.getResourceName()); + k8sEventVO.setStartTime(e.getStartTime()); + return k8sEventVO; + } + ).collect(Collectors.toList()); + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sGpuConfigServiceImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sGpuConfigServiceImpl.java new file mode 100644 index 0000000..9f80d21 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/service/impl/K8sGpuConfigServiceImpl.java @@ -0,0 +1,121 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.service.impl; + +import cn.hutool.core.collection.CollectionUtil; +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import org.dubhe.k8s.dao.K8sGpuConfigMapper; +import org.dubhe.k8s.domain.dto.K8sGpuConfigDTO; +import org.dubhe.k8s.domain.entity.K8sGpuConfig; +import org.dubhe.k8s.service.K8sGpuConfigService; +import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; + +import java.util.List; +import java.util.stream.Collectors; + +/** + * @description 用户GPU配置管理服务接口实现类 + * @date 2021-9-6 + */ +@Service +public class K8sGpuConfigServiceImpl implements K8sGpuConfigService { + + @Autowired + private K8sGpuConfigMapper k8sGpuConfigMapper; + + /** + * 根据用户 namespace 查询用户配置 + * + * @param namespace 命名空间 + * @return List 用户配置 VO + */ + @Override + public List findGpuConfig(String namespace) { + List k8sGpuConfigs = k8sGpuConfigMapper.selectList(new QueryWrapper<>(new K8sGpuConfig().setNamespace(namespace))); + // 如果老用户未初始化GPU配置,则返回默认配置 + if (CollectionUtils.isEmpty(k8sGpuConfigs) && k8sGpuConfigMapper.selectCountByNamespace(namespace) == 0) { + List preUserGpuConfigs = k8sGpuConfigMapper.selectList(new QueryWrapper<>(new K8sGpuConfig().setNamespace("namespace-0"))); + if (CollectionUtil.isNotEmpty(preUserGpuConfigs)) { + k8sGpuConfigs.addAll(preUserGpuConfigs); + } + } + return k8sGpuConfigs; + } + + /** + * 获取用户显卡资源限制配置 + * @param namespace 命名空间 + * @param gpuModel GPU型号 + * @param k8sLabelKey k8s GPU资源标签key值 + * @return 用户显卡资源限制配置,单位:卡 + */ + @Override + public Integer getGpuLimit(String namespace, String gpuModel, String k8sLabelKey) { + K8sGpuConfig k8sGpuConfig = k8sGpuConfigMapper.selectOne(new QueryWrapper<>(new K8sGpuConfig().setNamespace(namespace).setGpuModel(gpuModel).setK8sLabelKey(k8sLabelKey)).last(" limit 1 ")); + Integer gpuLimit = 0; + if (k8sGpuConfig != null) { + gpuLimit = k8sGpuConfig.getGpuLimit(); + } + // 如果老用户未初始化GPU配置,则返回默认配置 + if (k8sGpuConfig == null && k8sGpuConfigMapper.selectCountByNamespace(namespace) == 0) { + K8sGpuConfig preK8sGpuConfig = k8sGpuConfigMapper.selectOne(new QueryWrapper<>(new K8sGpuConfig().setNamespace("namespace-0").setGpuModel(gpuModel).setK8sLabelKey(k8sLabelKey))); + if (preK8sGpuConfig != null) { + gpuLimit = preK8sGpuConfig.getGpuLimit(); + } + } + return gpuLimit; + } + + /** + * 创建或更新k8s GPU配置 + * @param k8sGpuConfigDTO k8s GPU配置实体 + * @return + */ + @Override + public void UpdateGpuConfig(K8sGpuConfigDTO k8sGpuConfigDTO) { + if (k8sGpuConfigMapper.selectCount(new QueryWrapper<>(new K8sGpuConfig().setNamespace(k8sGpuConfigDTO.getNamespace()))) > 0) { + k8sGpuConfigMapper.delete(new QueryWrapper<>(new K8sGpuConfig().setNamespace(k8sGpuConfigDTO.getNamespace()))); + } + if (!CollectionUtils.isEmpty(k8sGpuConfigDTO.getGpuResources())) { + List k8sGpuConfigs = k8sGpuConfigDTO.getGpuResources().stream().map(x -> + { + K8sGpuConfig k8sGpuConfig = new K8sGpuConfig(); + BeanUtils.copyProperties(x, k8sGpuConfig); + k8sGpuConfig.setNamespace(k8sGpuConfigDTO.getNamespace()); + return k8sGpuConfig; + }).collect(Collectors.toList()); + k8sGpuConfigMapper.insertBatchs(k8sGpuConfigs); + } + + } + + /** + * 删除k8s资源配置 + * @param namespaces 命名空间 + */ + @Override + public void delete(List namespaces) { + if(CollectionUtil.isNotEmpty(namespaces)){ + QueryWrapper k8sGpuConfigWrapper = new QueryWrapper<>(); + k8sGpuConfigWrapper.in("namespace",namespaces); + k8sGpuConfigMapper.delete(k8sGpuConfigWrapper); + } + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/BizConvertUtils.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/BizConvertUtils.java index 87636bf..16c8074 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/BizConvertUtils.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/BizConvertUtils.java @@ -191,10 +191,20 @@ public class BizConvertUtils { } /** + * 将List 转为 List + * + * @param serviceList Service 对象集合 + * @return List BizService对象集合 + */ + public static List toBizServiceList(List serviceList) { + return serviceList.parallelStream().map(obj -> toBizService(obj)).collect(Collectors.toList()); + } + + /** * 将Service 转为 BizService * * @param service 对象 - * @return + * @return BizService 对象 */ public static BizService toBizService(Service service) { return MappingUtils.mappingTo(service, BizService.class); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCallBackTool.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCallBackTool.java index 6f9db2d..1df466f 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCallBackTool.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCallBackTool.java @@ -73,7 +73,7 @@ public class K8sCallBackTool { * k8s 回调路径 */ private static final String K8S_CALLBACK_PATH_DEPLOYMENT = "/api/k8s/callback/deployment/"; - public static final String K8S_CALLBACK_PATH_POD = StringConstant.K8S_CALLBACK_URI+ SymbolConstant.SLASH; + public static final String K8S_CALLBACK_PATH_POD = StringConstant.K8S_CALLBACK_URI + SymbolConstant.SLASH; static { K8S_CALLBACK_PATH = new ArrayList<>(); @@ -113,7 +113,7 @@ public class K8sCallBackTool { */ public boolean validateToken(String token) { String expireTime = AesUtil.decrypt(token, secretKey); - if (StringUtils.isEmpty(expireTime)){ + if (StringUtils.isEmpty(expireTime)) { return false; } String nowTime = DateUtil.format( @@ -141,7 +141,7 @@ public class K8sCallBackTool { * @return String */ public String getPodCallbackUrl(String podLabel) { - return "http://"+BusinessLabelServiceNameEnum.getServiceNameByBusinessLabel(podLabel) + K8S_CALLBACK_PATH_POD + podLabel; + return "http://" + BusinessLabelServiceNameEnum.getServiceNameByBusinessLabel(podLabel) + K8S_CALLBACK_PATH_POD + podLabel; } /** @@ -151,7 +151,7 @@ public class K8sCallBackTool { * @return String */ public String getDeploymentCallbackUrl(String businessLabel) { - return "http://"+BusinessLabelServiceNameEnum.getServiceNameByBusinessLabel(businessLabel) + K8S_CALLBACK_PATH_DEPLOYMENT + businessLabel; + return "http://" + BusinessLabelServiceNameEnum.getServiceNameByBusinessLabel(businessLabel) + K8S_CALLBACK_PATH_DEPLOYMENT + businessLabel; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCommonUtils.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCommonUtils.java new file mode 100644 index 0000000..c83c47c --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sCommonUtils.java @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.utils; + +import io.fabric8.kubernetes.api.model.Quantity; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.k8s.constant.K8sParamConstants; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.Map; + +/** + * @description 通用工具类 + * @date 2021-09-06 + */ +@Component +public class K8sCommonUtils { + @Value("${rdma.enable}") + private Boolean rdmaEnable; + + /** + * 添加Rdma资源 + * + * @param resourcesMap 资源Map + */ + public void addRdmaResource(Map resourcesMap) { + if (rdmaEnable && resourcesMap != null) { + resourcesMap.put(K8sParamConstants.RDMA_HCA_RESOURCE_KEY, new Quantity(String.valueOf(MagicNumConstant.ONE))); + } + } + + /** + * 获取rdma资源 + * + * @return Map 资源Map + */ + public Map getRdmaResource() { + if (rdmaEnable) { + return new HashMap() { + { + put(K8sParamConstants.RDMA_HCA_RESOURCE_KEY, new Quantity(String.valueOf(MagicNumConstant.ONE))); + } + }; + } + return null; + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/PrometheusUtil.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/PrometheusUtil.java index d0df8f2..732aebd 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/PrometheusUtil.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/PrometheusUtil.java @@ -26,8 +26,11 @@ import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.bo.PrometheusMetricBO; +import org.dubhe.k8s.domain.bo.PromethusNodeMetricsBo; import org.dubhe.k8s.domain.dto.PodQueryDTO; +import org.dubhe.k8s.domain.vo.GpuUsageVO; +import java.util.List; import java.util.Map; /** @@ -41,18 +44,18 @@ public class PrometheusUtil { * @param paramMap * @return */ - public static PrometheusMetricBO getQuery(String url,Map paramMap){ - if (StringUtils.isEmpty(url)){ + public static PrometheusMetricBO getQuery(String url, Map paramMap) { + if (StringUtils.isEmpty(url)) { return null; } try { - String metricStr = HttpUtil.get(url,paramMap); - if (StringUtils.isEmpty(metricStr)){ + String metricStr = HttpUtil.get(url, paramMap); + if (StringUtils.isEmpty(metricStr)) { return null; } return JSON.parseObject(metricStr, PrometheusMetricBO.class); - }catch (Exception e){ - LogUtil.error(LogEnum.BIZ_K8S, "getQuery url:{} paramMap:{} error:{}", url,paramMap,e.getMessage(),e); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "getQuery url:{} paramMap:{} error:{}", url, paramMap, e.getMessage(), e); return null; } } @@ -64,23 +67,23 @@ public class PrometheusUtil { * @param podQueryDTO 查询参数 * @return */ - public static Map getQueryParamMap(String param,String podName,PodQueryDTO podQueryDTO){ + public static Map getQueryParamMap(String param, String podName, PodQueryDTO podQueryDTO) { Map paramMap = new HashedMap<>(MagicNumConstant.EIGHT); - if (StringUtils.isEmpty(param) || StringUtils.isEmpty(podName)){ + if (StringUtils.isEmpty(param) || StringUtils.isEmpty(podName)) { return paramMap; } - paramMap.put(StringConstant.QUERY, param.replace(K8sParamConstants.POD_NAME_PLACEHOLDER,podName)); - if (podQueryDTO == null){ + paramMap.put(StringConstant.QUERY, param.replace(K8sParamConstants.POD_NAME_PLACEHOLDER, podName)); + if (podQueryDTO == null) { return paramMap; } - if (podQueryDTO.getStartTime() != null){ - paramMap.put(StringConstant.START_LOW,podQueryDTO.getStartTime()); + if (podQueryDTO.getStartTime() != null) { + paramMap.put(StringConstant.START_LOW, podQueryDTO.getStartTime()); } - if (podQueryDTO.getEndTime() != null){ - paramMap.put(StringConstant.END_LOW,podQueryDTO.getEndTime()); + if (podQueryDTO.getEndTime() != null) { + paramMap.put(StringConstant.END_LOW, podQueryDTO.getEndTime()); } - if (podQueryDTO.getStep() != null){ - paramMap.put(StringConstant.STEP_LOW,podQueryDTO.getStep()); + if (podQueryDTO.getStep() != null) { + paramMap.put(StringConstant.STEP_LOW, podQueryDTO.getStep()); } return paramMap; } @@ -91,12 +94,80 @@ public class PrometheusUtil { * @param podName pod名称 * @return */ - public static Map getQueryParamMap(String param,String podName){ + public static Map getQueryParamMap(String param, String podName) { Map paramMap = new HashedMap<>(MagicNumConstant.TWO); - if (StringUtils.isEmpty(param) || StringUtils.isEmpty(podName)){ + if (StringUtils.isEmpty(param) || StringUtils.isEmpty(podName)) { return paramMap; } - paramMap.put(StringConstant.QUERY, param.replace(K8sParamConstants.POD_NAME_PLACEHOLDER,podName)); + paramMap.put(StringConstant.QUERY, param.replace(K8sParamConstants.POD_NAME_PLACEHOLDER, podName)); + return paramMap; + } + + /** + * prometheus get node使用率查询 + * @param url + * @param paramMap + * @return List node上gpu使用率统计 + */ + public static Map> getPrometheusQuery(String url, Map paramMap) { + if (StringUtils.isEmpty(url)) { + return null; + } + try { + String metricStr = HttpUtil.get(url, paramMap); + if (StringUtils.isEmpty(metricStr)) { + return null; + } + PromethusNodeMetricsBo promethusNodeMetricsBo = JSON.parseObject(metricStr, PromethusNodeMetricsBo.class); + return promethusNodeMetricsBo.getGpuUsageResults(); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "getQuery url:{} paramMap:{} error:{}", url, paramMap, e.getMessage(), e); + return null; + } + } + + /** + * 组装参数 + * @param param 查询表达式 + * @return + */ + public static Map getQueryNodeParamMap(String param) { + Map paramMap = new HashedMap<>(MagicNumConstant.TWO); + + paramMap.put(StringConstant.QUERY, param); + return paramMap; + } + + /** + * 组装参数 + * @param param 查询表达式 + * @param sumDay 统计时间段 + * @return + */ + public static Map getResourceUsageRateParamMap(String param, String sumDay) { + Map paramMap = new HashedMap<>(MagicNumConstant.TWO); + if (StringUtils.isEmpty(param) || StringUtils.isEmpty(sumDay)) { + return paramMap; + } + paramMap.put(StringConstant.QUERY, param.replace(K8sParamConstants.SUM_DAY, sumDay)); + + return paramMap; + } + + /** + * 组装参数 + * @param param 查询表达式 + * @param sumDay 统计时间段 + * @param namespaces 用户命名空间 + * @return Map + */ + public static Map getResourceUsageParamMap(String param, String sumDay, String namespaces) { + Map paramMap = new HashedMap<>(MagicNumConstant.TWO); + if (StringUtils.isEmpty(param) || StringUtils.isEmpty(sumDay)) { + return paramMap; + } + paramMap.put(StringConstant.QUERY, param.replace(K8sParamConstants.SUM_DAY, sumDay).replace(K8sParamConstants.NAMESPACES, namespaces)); + return paramMap; } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java index 03e0d9d..3d173b6 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java @@ -41,7 +41,9 @@ import io.fabric8.kubernetes.api.model.extensions.IngressRule; import io.fabric8.kubernetes.api.model.extensions.IngressRuleBuilder; import io.fabric8.kubernetes.api.model.extensions.IngressTLS; import io.fabric8.kubernetes.api.model.extensions.IngressTLSBuilder; +import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.bo.BuildIngressBO; import org.dubhe.k8s.domain.bo.BuildServiceBO; @@ -57,11 +59,11 @@ import org.dubhe.k8s.enums.ShellCommandEnum; import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; + +import static org.dubhe.biz.base.constant.MagicNumConstant.ZERO_LONG; + +import static org.dubhe.biz.base.constant.MagicNumConstant.ZERO_LONG; /** * @description 构建 Kubernetes 资源对象 @@ -240,6 +242,11 @@ public class ResourceBuildUtils { Map childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(),bo.getTaskIdentifyLabel()); LabelSelector labelSelector = new LabelSelector(); labelSelector.setMatchLabels(childLabels); + Map gpuLabel = new HashMap<>(2); + if(bo.getGpuNum()> MagicNumConstant.ZERO){ + gpuLabel.put(K8sLabelConstants.NODE_GPU_LABEL_KEY, K8sLabelConstants.NODE_GPU_LABEL_VALUE); + gpuLabel.put(K8sLabelConstants.NODE_GPU_MODEL_LABEL_KEY, bo.getGpuModel()); + } return new DeploymentBuilder() .withNewMetadata() .withName(deploymentName) @@ -256,7 +263,9 @@ public class ResourceBuildUtils { .withNamespace(bo.getNamespace()) .endMetadata() .withNewSpec() + .withTerminationGracePeriodSeconds(ZERO_LONG) .addToNodeSelector(K8sUtils.gpuSelector(bo.getGpuNum())) + .addToNodeSelector(gpuLabel) .addToContainers(buildContainer(bo, volumeVO, deploymentName)) .addToVolumes(volumeVO.getVolumes().toArray(new Volume[0])) .withRestartPolicy(RestartPolicyEnum.ALWAYS.getRestartPolicy()) @@ -276,12 +285,17 @@ public class ResourceBuildUtils { public static Container buildContainer(DeploymentBO bo, VolumeVO volumeVO, String name) { Map resourcesLimitsMap = Maps.newHashMap(); Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); - Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); + if (bo.getGpuNum() != null && bo.getGpuNum() > MagicNumConstant.ZERO && !StringUtils.isEmpty(bo.getK8sLabelKey())){ + resourcesLimitsMap.put(bo.getK8sLabelKey(), new Quantity(String.valueOf(bo.getGpuNum()))); + } + if(!CollectionUtils.isEmpty(bo.getCustomResourcesLimitsMap())){ + resourcesLimitsMap.putAll(bo.getCustomResourcesLimitsMap()); + } Container container = new ContainerBuilder() .withNewName(name) .withNewImage(bo.getImage()) - .withNewImagePullPolicy(ImagePullPolicyEnum.IFNOTPRESENT.getPolicy()) + .withNewImagePullPolicy(StringUtils.isEmpty(bo.getImagePullPolicy())?ImagePullPolicyEnum.IFNOTPRESENT.getPolicy():bo.getImagePullPolicy()) .withVolumeMounts(volumeVO.getVolumeMounts()) .withNewResources().addToLimits(resourcesLimitsMap).endResources() .build(); diff --git a/dubhe-server/common-k8s/src/main/resources/key/id_rsa b/dubhe-server/common-k8s/src/main/resources/key/id_rsa new file mode 100644 index 0000000..a7bbbc5 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/key/id_rsa @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEA06ZOLQq4pzBZL+bybsxdl4PzYg3jB4kRVc771nm5Y8JenDAT +hlOTz6+nGH4EDT63J7oNj4JYLufsONKYhJkya8p0btWeKHqz5LgEfLGwz/FTMRH5 +WTCZCZUa/3i9gQeKK/CKEned1h4l2w4agrYrnXHpnuNSw6HSlTpX8FgaQGfmTkL3 +XtzSCeY9F2fXGOm9fMfVmv5I5uP6B4TmKwtWPvx3a/1MDgHbmtoaCqYP/JmzWHyi +mc9l2ilX3kTPxh57oRtW9N3FATc8/OCYkNt4vDUTRVB4drODaR5TgUbFtkBVGcFR +f7MrQo4Krd2g8rtEv7PaWN/wlNle5ANXJ/oL3wIDAQABAoIBADiqC8APYMSSMy6Z +/EohuOT51M1pvmCkF9oLYm1XhYTp4v6Z+IA8HBS8iFYMVvVc1xhxvXOwh/925E2K +RH8rrM4jE+0gkAlyYHtZsQnZYOcrSwSWNVXlpvNj0iiXoNTMufdtnOm40K8kvynY +qsxYDXFHsC5z2hK6XnDJgAW+8LhRHCizWwxc0dSN9r33VGry0rgndUZsj2ZBf7u5 +rdslZKvRzMymXct7CIQQ3s5IUO3qbaj7TIzMIo14bmHgD3zlBQ66ESCX1o5A+hPq +1gfUNqUPBtJhsNJg4YYJ/bGgGhBxAxam8jWz3DFZEuYHr6fCDIhLJzL5ulxoQS2z +vJYBwsECgYEA8JGfw004BxqcBVxqBveestsCVGIWDtb+Zx4OI+uBAmYMXd2WCzxv +XxgQJ/IrpNx6FAXZ/bFdE0HRZWR6H07wtNgABuBgd0tAfcH8sw2CJkTO/0N2Xr6/ +O4kh3yHNMy/wAxnktISf1hE/ElEdPI6slhwGDQObRdXxaqBEq+Tjc28CgYEA4TnM +rCaJ8aMaUE0nvVzrev3VTLp4f1qOcPUOnrHDdyrPs1SjYzmAOC72X/FylJZmtkvh +coMQUKVQgiBn1dTtnALANq705b1S+0U07m6+dGJ7LWchOY2tFPiIsx3SZvNJeEKJ +38PsaFi2eDcDP8cKriNoAoby8TbqjqiyHgDX9pECfxww9IfuhKJQe/gk3Ef0vKQ5 +BgzdcbhLeYScAQw0jOm7C7f0P6ERc/uw1jPYLUUkkSnHhcQ1BLM9A0zeeXExzwNi +TJ6BrMxOBUC3euWAr7/MUHWZckWoFMDlURLU4zccZwP2BNcis5hibQG4f7SZA6CT +qCHeSlPkvmXAYkvChuUCgYEA0DNlL9KkfBqBja/1R4jpKhYSIs7R6zCkMmlm7W54 +ueV6gVWBgI08KTPIj2KcwBzUsDovG3NrFpHrfY9FTZd7W1fzpdlQDDxaxGryhmMb +bm1HXu5R+WktkhA6FhJAWOkXhrNDzvXHyaIQc8qvFzsBdX7HfGaRmEhixiPOHAw9 +l/ECgYEAwNywUARR9HtmgoyrwifrzIkMo6jcmLNEIzi2kJ4OQQxW5eKj5JgSV0ND +QUoAIWDAhHQd3ygSfbeShcvtcw+zoF92iOVFn0SLiSe1TgA5ggzC/VJUnInO7zx7 +8Sj8Zk5tHrVmTlelEA2Nbq5H7/U1Q33c1AWbw8yxqD/JRxudHKA= +-----END RSA PRIVATE KEY----- \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/resources/key/id_rsa.pub b/dubhe-server/common-k8s/src/main/resources/key/id_rsa.pub new file mode 100644 index 0000000..a356ff4 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/key/id_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDTpk4tCrinMFkv5vJuzF2Xg/NiDeMHiRFVzvvWebljwl6cMBOGU5PPr6cYfgQNPrcnug2Pglgu5+w40piEmTJrynRu1Z4oerPkuAR8sbDP8VMxEflZMJkJlRr/eL2BB4or8IoSd53WHiXbDhqCtiudceme41LDodKVOlfwWBpAZ+ZOQvde3NIJ5j0XZ9cY6b18x9Wa/kjm4/oHhOYrC1Y+/Hdr/UwOAdua2hoKpg/8mbNYfKKZz2XaKVfeRM/GHnuhG1b03cUBNzz84JiQ23i8NRNFUHh2s4NpHlOBRsW2QFUZwVF/sytCjgqt3aDyu0S/s9pY3/CU2V7kA1cn+gvf root@{{ip}} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_ai b/dubhe-server/common-k8s/src/main/resources/kubeconfig_ai new file mode 100644 index 0000000..f0f298b --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_ai @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN5RENDQWJDZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1Ea3hNVEUxTkRRek1Gb1hEVE14TURrd09URTFORFF6TUZvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTEM3Cm9mK2VEVlRtUllJNEdVS01oLzNGQ3R4TU4vMGlFTVd6OXVydytMbWt4Wll4TExObC9odVhhZllQUVhINTh3bGkKaWVIUnI1N25Na3ZpS2dKbTdjM0dmNFNwL3ZwUVFReW1qYWxiWWdYakp1RGRyNHZCUWQ4Szd0RHU1NytTTmxIdwp5ZStCKzd5MEpxdlI0Sm1OQWk1bHp4Szd4ZmlFU2dHQ2xxcU9XRy91dDlFdTBPL1orWUJ5VExGbXpkdmhlSUVlCnlCZGxGS2FsZmhtbWlaRUFSRFBReldRcnJWSTRkNTFhbmQyVVRLMXFnZC9uaERSaHUvUXNWQUhmaUZnOWJpclAKcmJXelR0d0lkNG9WbUZYTm5wWU5UU0FBeUh0NTZReGpwRys5b0lSbXR2MjN0TitoQkpxZ0duVmt3U2NZNkhyaQovR0MxQUJJdG90cTZPZEc0MjM4Q0F3RUFBYU1qTUNFd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFJdFd5NmFJbUZUVEcrZUdxL21kOWkzUitJSkEKaHJieGF3NkZiVFhiUEswMkhEcXIwdmNqUy94T0huRWt2YkY5RFhralFLU3ppaHJRbFd0RjQza282TE5hWStFVQplaExYbkRQemdPT3FVcnVoUHJyMTZXRG1MeTg3ODJ0NjdtT1dSbHhXWVJheExxTWtZYmc1UXlSVzZQeHRRc01NCnY2N3dFT3NYc0NtWUlEanhrUFR6YUkrbkIyT0IvMFFHWFk5Yk5HTWN3NzRkZHg2dUgyTUtmR0lZdmdaNVAxdkQKSnl3eTk4eDlKamJVN1JuZW5uQzBmR1NlUHl3c1hBa2duY21uZVYwM2tnS1VjWnZIQmlpeWlhRXFJQ0dTcS9jagowS25Ca3B0Q0IwajN4WFhUOUNDUkUwZktTamR5VXR1Z0liN0d1bnVRK1M4WjBvbWV5NTFZOEQwb0ljdz0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://10.105.0.87:8443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM4akNDQWRxZ0F3SUJBZ0lJUzRaYTlyZHhvUGN3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRBNU1URXhOVFEwTXpCYUZ3MHlNakE1TVRFeE5UUTBNekphTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXNyNmt6a1BKNEEyZzh4SHgKLzdjZDlMbTlvTlNWMTViREo3ZitvQms4YThpZXdBeGR6aU9XSWYwcVRrUjByT29NOXRnemhwbEJMNUJzcGVzZApQQk9pSWd3RzZuaEhYdjZUQkh1QXJuTDRwOUFrMzBaeFlFNVRJeG54amdsSXN2L3dUVVdldWNFK21pNk5wSlpDCjh3bE5BVjRFalBlVnR4THRLRC9sUFo0bGM0eUdNYXE0WjZYQ2M1Y21hSTg5REVkZUJ4eGE3YUptdEdsNXRETUwKdmVpNS9LVVhoOVZaV2ZKUHd0S01wYU1VY0hWa2VTUy9uU2I0L3dSRzlVeTF1RDNPMC84S2dTd0dzcUdqVlFJVgpLaG5QejJieThkRzRIL3FVT0lzUzJINnQwZUV5R1ZCZWdUaldMT1pwM2V6REpZQnpKV1NLQVRmYWVBcHEwK0ZVCkYySlhMUUlEQVFBQm95Y3dKVEFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFGbjBwREtEYU80YWhLSHdDUjZUTzltSm1GNTltd05LaGI2NwoyU3hQRWJQbjhqdmJtTmcrUGJzYzExM2txeTFjSXZkNUZ4MDZnMTMyc095Nzl6Skw4NlYyTVZSd2pKbk5JdEVyCnNFVDRwdWtkREdWMTV5NUJsbktjbitybVRFZHg2VVYzUE1rSXowUjc4OHVEUWNyUTBnNjMrUzkxZU1qSDFPVmUKb2RQSjVMblkrRlBzazNBS3RYWitRdE5PeE0vbGwya2ZUZWgzVzNydFFVczRTZU9RVWVsdWhxOTMzV21wYzVnRQpaNUVrbVltcU5VMjJTcTFKRWZvNnFDRlJZaklWSGlLTVFtYVNHc3Jrc3gxT0R2a3FINFY1Vmgxc0hFSlZMNXhnCmJVeXp6SmdPTEFFUGlQb3ZmN3hsM3IxQnJNc01id21veTd5YkR1ZGo4dzM3SkZQZ0dMND0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb3dJQkFBS0NBUUVBc3I2a3prUEo0QTJnOHhIeC83Y2Q5TG05b05TVjE1YkRKN2Yrb0JrOGE4aWV3QXhkCnppT1dJZjBxVGtSMHJPb005dGd6aHBsQkw1QnNwZXNkUEJPaUlnd0c2bmhIWHY2VEJIdUFybkw0cDlBazMwWngKWUU1VEl4bnhqZ2xJc3Yvd1RVV2V1Y0UrbWk2TnBKWkM4d2xOQVY0RWpQZVZ0eEx0S0QvbFBaNGxjNHlHTWFxNApaNlhDYzVjbWFJODlERWRlQnh4YTdhSm10R2w1dERNTHZlaTUvS1VYaDlWWldmSlB3dEtNcGFNVWNIVmtlU1MvCm5TYjQvd1JHOVV5MXVEM08wLzhLZ1N3R3NxR2pWUUlWS2huUHoyYnk4ZEc0SC9xVU9Jc1MySDZ0MGVFeUdWQmUKZ1RqV0xPWnAzZXpESllCekpXU0tBVGZhZUFwcTArRlVGMkpYTFFJREFRQUJBb0lCQVFDY0czNXFHckF6K3hNbgpWdlBvcG5BR0xUWWRRYTBBVmM1cFlQdURyYWFrWjltbDF1Y1NRdWc3RlJVZXVCRENkOE1NenZQcFNPMnljY3RCCitSNVdPcnR1U3R5N2FJTEV4MGQxTWJ2SUgrZGxBSmREWFExbDArSXVGTmx2ZUZKQzZ0NWZUajk2OGZnUHhqcTAKbFBzeHNWek1wZVVKakszMzd4S3JCV3JicnNQOHJqY2pCeVUrR1FBWkV4dGRuTFVFbmhmQVFneU9VLytROXdXLwpQaUVSTW5lVUVtK1laWmRYbGRVbytYMXNPditFZEQvMHROTllMWUZaaTVVSnNmQlh4bElzNVhYQkllNmxlVmRaClpsZHI1THJnbk94RVc1NFlUYUVQTjdLajRDRGpSTy9oR2hYSlhkMXNwQUdSeHYwQy9MVHdHUG1KZnhlWjJmV0YKazMrNUt4UUJBb0dCQU4xY3d5TUJyR2Q3Mk1RVEV2S204dlRnZE0rSTNYb0JBS1NJUzVGRi8rYk9zd096SHUwZQoxM1d4Y1VpaWZGNVFKNmVVczQ3QzFmMWRraVdsbjRic0xmR1JSYjJqdm5rZ3NLWWlNNkVkT0pzckdLdFgwVW1yCnpUOUFoRDdtemx6Z3ZKV1NUaFQwU2x6MG95M1dkVHRwNDBWbzJOSVlzSXlFZ00vU3REaUVsck1CQW9HQkFNNjIKdU5mbndEWk9KU0xwekRJaFhoc1RQb0JKVVhiUHRhMmpBSUtrbFkzeCtOZHhNejhEQkc2QnBpYS94ekRKUEduYwoxS2x6NjZ2R3EvMXRFMENuTkZ0U0hza3JqcWtRNWFXeWNxS3VVRzZiQjhxanRBNEdOSXR0cFFET2RweUJXdGRuCnFSRlliS1c2cVFmWDdlWTdFVDFEQ3V2eC9XUDJIYUFrS1RsZ1JPQXRBb0dBT2RHbTdwZmJUbFJjOUNuL1F2NDAKYTUzTkpRQWN5V2RGb2JRditNd040VjJRQ0tGYXJKam5za2djek5vVUhoWlZwVUlEL0lBTG9OZzEranRlaGNGZQo1c0FKTlZWMXhQR3k4bEh3cGw2WmtINGJQWFg4cG4rSFpzUVZSdERHaWgveFlpZ0syUkpOOGlXOEVaYjRuQUkyCmFNVFFEV0NCak5KM1N0RkFYWnJVT2dFQ2dZQXdSOWMyZm1lVGgzNysvNUowNXZ3KzRLakpkOVp5SktOdmNkSHQKc1psSXRRcUU3Mmlqd0FXcDlkZ2oxc2trYU93V1ljQzk3MWhOSDdNT0Z3clhIL2FIbWMrYzl2SjBKSUV1eVZZagpEd20wTzd0L0plc3BGMlB3YU9TUUlxanJkdjhMcnFkN2FLYS9zckJHSjdYbC8wZlFnamNyMHJKM1JZVzM0SnBNClQyTG01UUtCZ0VLL2l4bGd4QndEcVptT09ySUU2eDlNcDdabDN0U2RCOFVoMy84ZHhZVmtkcEFvK3pHZDg3M3cKODJaMUllcjRoK1FCa2FYdEdsNGM5V1k1YWhpSERHODhIbmJRYm9WbkdxTWJkeTNOVWIrYjZFMG1EckJ1aVhHaAowTWMzK0JUTWhnZGRTZmlmRytIUDBtdVhkdGhHSVJOTkExZ2xWd0x3OEdDOGpsODVkMkFDCi0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_dev b/dubhe-server/common-k8s/src/main/resources/kubeconfig_dev new file mode 100644 index 0000000..f544f26 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_dev @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM1ekNDQWMrZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1URXlPVEE0TXpNeE4xb1hEVE14TVRFeU56QTRNek14TjFvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTmpCCmxkVWhGQkdqSDNsYXIxTW9wVUgvMW8xNVUwbEhvcGQzZjgyL0tWMlEyOU9BKzhjMkhXYTd3R01QSzhxL1JmSVAKTjlnYXJJd0FqZXFwNHFSMWhEcVhka05WN2NOZUZ2OGdoMUVYSzMyTUxoaFZJdXhkTnF6OW9GVm85RDZqTzQ5awpEOWQrM215eU4zamxsT2wyTWVRMUY0STlHSTFabjFCOG5LQVZWVjFIZTBPTGZjVTIrZG5mU0g4ZFdqV2RPQlBhCkJmbGU4UFRLQy9tOFVmcTQvanArVU92R04yVEhIaXk0V2UyTWFEK0xxWDIzekE0T1FlK3NrdlY4bHlqK0ppVUQKVVRiM2xzazlhTDMxblMxUmpnVStkbVIyR2xHQ1BHWEhNc2Z3MlVCZzljZEM5ZzhKc2dFeTJDa04xMitjeWx3dApDbHFQQ3o0MlNlNEJMNWF1ZGJFQ0F3RUFBYU5DTUVBd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0hRWURWUjBPQkJZRUZCTmFSa0hpTzdvZDd2U3pUUm9qaHFTWStSNXpNQTBHQ1NxR1NJYjMKRFFFQkN3VUFBNElCQVFDVnZGRUg0ZEYzUTQrRDlOeHBZdDM4ZG9mb0ExYjlYOWNndU5YYktvbGxHK1lQeWl4NwpBcGlMWkp2WFlsbk0rUU5BYjMyOU84VmNkQUp0aDZ4MjVHSVNUVjFGbXBWWHFlRVIvUFJOZURsaHFybzVwYUlVCkV1YUQzYldxaitWb2p6b1AyVzZMU21RdzQwNUIzV0JKRlJtdm51bW9LYStyUWEySVoralRNMVQ5bDV5bXR2YloKS0lXNGZFZ0N2VEhPSmtRLzBGWDg3dEc0VlVJZ3hHRXZJU2lUUkJYc3hnQXdVK0I5SnN4bEZSS3NHYTJ4QkE2bQprZXR1VFVlZkswTGZRMFlMWVVKSVA5Y0RsNm5QSld5U0NTcWFZUGRoaGxERWxEYUt4d2JZeHdBVkI0ZzV1cWhNCnZrMkN0RVFHOWg0b2ZxZjQ4VDFLVlJVRGo5UzdBL2xtbEZxNgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + server: https://10.5.26.91:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURFekNDQWZ1Z0F3SUJBZ0lJT21TandKODdaUWt3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRFeE1qa3dPRE16TVRkYUZ3MHlNakV4TWprd09ETXpNVGhhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXdSWTJuYkJZc25KMWxlQ1MKS1lxbCtLSlBoOW5UdkxqanNRczJobEJrcmM0M1o5eUNEUTM5V1RWSkZHdHZzUDVQK1JNdzFOYUY5QlJUbTQ2dQpOQXBYTGt2bTRLZm1mL2taVUh5clpLYTllSnlkbTRWZTNjNWVWeXpidHRVUVgxQUNFOGZIM0VqY21hRHhPZGViCjBHa3dHc2FnVVJiQ1JxSWpZYmQ3V3BkK3F1OGRuUC9TbmJkaURuaU9sNVVoSlhMUEl6TmlLZTZnREt3NUN6VGgKbVVjZnc1azB4WFpwRFBIckwxOG4yMVB0dlRZSEhxYXo0ckx5MWZiWWsyOG54cXZEaHYzUVBXVldRNGhVR25tZQpNRXZlcTRuNU1sRGJOU2tZUHhaSDNUbUhrZldXR1Z2dGI1RDFLeStSbnFISUxNZEIvdHN3YisyaDVBQTE0WXJICjhBbnZ5UUlEQVFBQm8wZ3dSakFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0h3WURWUjBqQkJnd0ZvQVVFMXBHUWVJN3VoM3U5TE5OR2lPR3BKajVIbk13RFFZSktvWklodmNOQVFFTApCUUFEZ2dFQkFNdVZaem5pdlhXeDV1dHl5cGppcHJQWkdxeXVYb1J1NVR0eEZwMVJFcHVIb0Z1NHg4VkNUYkplCnNzNmN2VitORmdIa3BLSHlWb2Y0YStRNHlOcGZmMWcrL3d3a05XcHlBQTZBUWdCbW00djBIV1ZVTzYwVVN0VEcKckFhQVUyZ1R6dGYvWll3RFpreFdIUmo0V3ZIRjZRaDhUYUdFVTdaL1dWbC9rOW82MkJGNGRza1pCRVZmcDFvMApMSkorcGxmdXpLOTNwN0t4ME9hSUhHUFRPcnBydEc1ZmR1U2NmRDFUck1nOE1zbmIyRG5XTHVXNzZPUi92cVRBCmVaRFI0Nis5aTllKytQMUh0eDZZMFJNVVpuS1o0MCtIc3dFUjZFZHFINmdJd3Zia2FvSXQydmF2Y2dxdXQ2bWUKZ1VoaVlYelljREhmRllFOUJVWU5UVm1zdmxNOSt1RT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBd1JZMm5iQllzbkoxbGVDU0tZcWwrS0pQaDluVHZMampzUXMyaGxCa3JjNDNaOXlDCkRRMzlXVFZKRkd0dnNQNVArUk13MU5hRjlCUlRtNDZ1TkFwWExrdm00S2ZtZi9rWlVIeXJaS2E5ZUp5ZG00VmUKM2M1ZVZ5emJ0dFVRWDFBQ0U4ZkgzRWpjbWFEeE9kZWIwR2t3R3NhZ1VSYkNScUlqWWJkN1dwZCtxdThkblAvUwpuYmRpRG5pT2w1VWhKWExQSXpOaUtlNmdES3c1Q3pUaG1VY2Z3NWsweFhacERQSHJMMThuMjFQdHZUWUhIcWF6CjRyTHkxZmJZazI4bnhxdkRodjNRUFdWV1E0aFVHbm1lTUV2ZXE0bjVNbERiTlNrWVB4WkgzVG1Ia2ZXV0dWdnQKYjVEMUt5K1JucUhJTE1kQi90c3diKzJoNUFBMTRZckg4QW52eVFJREFRQUJBb0lCQVFDQVA4cmpEbUM3bGo3Mwo0QVAxUjd4WjJ5Vk1MS2p3N3pWOVFOOWtjejJNSEM2ODg4QUYvOGJKWEJWQVZPUUpBY0lJeEhCb3pseUc5SUZjCkQzY3g1YlJtY2MvV1pHODdqUzc5UVBGdWx6bStSUGlDZGlHKzNmWFhuVm9LcEphTldFMG82a2ViejJHSFFWazQKZkRkb1JKWEpFeWtrOHlMelczcDR5ekJKeWRvK21oQmhZWjRCd2xwRVExeURQU0xoTnpSR0NRL0tUTm94T1ZQTQppcVJUdkR2MGhZMTFLdGk1dExMbk1RWnIvaDJRNTliT3VaQVg0NDFiWVJjSzNyU2R0TzFBSGhYVmpSMDlZWFZUCnJwbE44VkhGeE1wZjVaNHFRTW5idk1yb1B0cU5PTzlqWCtrWjQrY2p2UjlQbzNaK1Y5QU5tUHR1bEtMejhFbjYKcUZSYTVTc3BBb0dCQU0vTStvU2t1QVpsT1ZXR1JsdkNlbE0vMHJWUDFrRTdSeHl2NkZDT2EySTlFcTEvUThraQo0N05saHZ2ei9TcmZia040Ymg2Z1RXOUc3WGVpZGtWWEhiaVRmZGlNcXA4YzJ5QTByQVJrWGtZNStycUpZRWlGCmFpOE1jUUVhKzRObzdHOC9xSk5hZTFtVzBGNE5JUTFoRlRMNHMxTnJ5Z0pIaUxDeHhIdVg4cjVUQW9HQkFPM2YKaU9uSm5qY3IrTmltbytGVXYvaHp5SjMzQ2FjNWNjakF5bk0vUGpheGRhOXk4RW1mL2x2eURDQmsranRydlV3NQo4eG1lMHc3UXZJNzlZZkljc2JrYTRnb1dtREIySXZXeGlJRkZCcWwzOUd1eFRYVUpCb2R6MXF4TFJWL2tSekZYCkhIZFM2VTVVTDJINkRQMmo2aVk3NCtWOHhSVlBhQnpPRnQrdDlMM3pBb0dBVjZMVDJGTDdoVDcxWWh0QjJ2cjIKeWRzMXl0K0R0WGtCY1pqQ09nOEdQS1VURm5hQXFQZjgzYkNvcitOalZzeHAzU3lTREhxQWpiaDA5NnBkaExyTAp2d2I4NFBIYUYwWjlKMlR2VXQ3ZFgzS2VTa01iaHhvMUFPaVhVK2NFUVpSamVqdDNwY1ZZRmgxamZQYStoTHozCnlzcFdIbzFyNFhBM3RvVVNIeW1pNXg4Q2dZQlBmZTBXRzU5NDFvUUJlcWEzYllmOXNrdXZwbGVlZjQ3cnhLbWoKRzlaaGxxbzZhSkd1T1A1YW9hZTF4NmpyNUpSTS9leWtlalFUOW1PYzhVWWpENzNBbXlCQXVNNUJHNm1SSzdQdgpaeTdUd1ZkNENiZFZ6aDlWY3J1aHRrSEh5dFNUdzRXcXhwVmE3TDBzNDlBbGp2cDNybm41UGRucXhpK0h1TU5pCkdwTDRTd0tCZ1FET0hRZ1ZGeGV0eU5zVjRkSXd6UThzbVp2ZHF1QzRzRXZTakx4UXd6ZmZadU1aMHFBc1R6VkUKYkE2dk01WlNpQ3A5SUxTd3VKTUc1eGhqMXZUdGVDOHBCSmdsM3BmM1ArS0pXeGZ4S3lYRCtRai9STDN6V2lxcwpFaDd4SFN6UlFCcDJESjhqMWdtakd4em5IbnlDdnBNdFJlQzBzcGp2N0JVZzNqaFR4SVRRdFE9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo= diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_pre b/dubhe-server/common-k8s/src/main/resources/kubeconfig_pre new file mode 100644 index 0000000..ce15511 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_pre @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN5RENDQWJDZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1EY3hNekEzTlRNek5sb1hEVE14TURjeE1UQTNOVE16Tmxvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTEZlCmp5KzlwTjNybnJpYTlpbDhzOEp0WWZoekVMUU1LbXVmb0Y0VEhjTnFObGtOVDJiL2pTcUUzbGdRYTR2Zm9oL1AKUFVOaWVkZEZvczFoRGNxRnF0QmRpaXJ5QXhSNERPeTJsMWpjSWVpVHJNaHYyd1JZc09ZMG4yanZITFdGdENCaQphY0lEeXFmNmhHM01SV2ZNUEhPVVdDQ0JIUmxLRFlnYkNDZXp5NWxxUEF4QXpmcUpEalUxWkNUVmNNa2Z3R1FDCmFSYVZ4aTFiRHhRSk9Ud2dTTGlJdEJIUGhrdGtOcnY1MU1XZzRSWkVUbWF3azg5Y09oWExpSEg3ZExFZjFvK0oKVE80Q3J1K2pyb3lIRkNvQmo0bGpKZHd4N0ZRS25XZFpEbWZiOFFoZEdyTkRuK21WR3BRNnZSRGN5YzloSVJsMwpyM3U4bkM2TS9mZ29UTHhWaTFFQ0F3RUFBYU1qTUNFd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFIbDZMYWpscFZLMk9PbG1JYzRVK3A4bTZoVGEKd2NCTW5MZjVJcExwME9CRmZ6M1hvVi9EVUlmTE1LU2k2bzY3a1Y0aFYzNDZMTnErU244elBUZEZkbDdjZUlvbQp2azc3YXE4c2VERk5xbTJMSytSUldmUWZQbFQrSGhiWFVpS1lOUUhmanlXdjh6QzFGaWpBQTBqaGVjWVBaSnU4ClpUb2JaenhPaS8yOGNwSmNOSFFDSW14OG16UkU2c3JTRjFsbE4yaWF0eTQ1cWhEZWwyZHpmSldaeTFzTXVwc2YKaTc4Z25qdlhFU3BYaGoweGZNbEdMNnA3QkVjdUw1YkgyN3Y3djJjelh2WVJnbk0vamhscGVzbVRsNzFiSE1KQQpnSzlpR2pqMHhoZ1Jnb3h2VHNQNVZLSUszTFZVbGxNNXJhNXYxTjdOOXpmQVZybnprRGRoaXNZQzZCUT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://10.105.0.29:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM4akNDQWRxZ0F3SUJBZ0lJVjB5b2I4RnFSdGt3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRBM01UTXdOelV6TXpaYUZ3MHlNakEzTVRNd056VXpNemhhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXVPN0M2OWNlUjVVbmZuUDIKREZNR1M2RzZ2RkYxLzNxSU85ZVFTR0pZRU15dDhHWmo4QWNlbnMrQTNDS05jRHJ4dk5oQlpvazlPa2w4SnpzRQpZOFhKMTFmZVdPS0Zwb0dYVGtseHBuakcvRWVwbWh1d1JodUZhaTlsZ1BXQnUyei9JOG5meTdjSUhscW1EYW1UCkViaEJGWitSbzlPYjB5dTJ1MklhUWNUTVpPREc5eEdTRURIT1VYTkIzcG5oWGhYRmhZUTZyeUpVMkFzYy9TS0UKa1ZTN3RWY2hoVHZOSHVnWC9YQ1hwZ1dpSXJhMUZNbUNBMGMzY003NDJhNFI4QnhmKzBremNBV0ZqQUIwc2xFdQphcURSK3I2SWQyUWRyVjVBQXUra29rU1MyRnhKNzF6R3VtRnRQdlVyT1UzeXhreWFJbHZ1aFcxZEV5cHl4WjVJCkJvSUNWd0lEQVFBQm95Y3dKVEFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFLKzRlV280b0g0K1RFS3doUU9ZSFJvWXhWcEVxWDBXZmoxSQp5US9Qbm9sTXh5ZmQxTkc4VzdyeW1FdWJZbEtIV3JjOTNKdHJBOTA3N01BRHMrWGcvWVZrbFdpVnF2b2Eva2xOCm9aY1pVc3Q2VEdMNVJ6QmE5NzFheCt3Q09xMXRiSng0ZGtwSDdmWVVWeXVaTUg0MXhxL1hIZEwzSnV3RzFSbU8KQnEvZWdmY3d0QzdhZGthRU1SbWxlRCtzelVBZkQxYjFmd29JNnpyV2ZyV0JnUTUwTmhFUzk0NldLOER1QU84cQovalFXRlVtd3M3Nzg0d3ZPR0JjTGpSeWNvWGpxSDZXeW5hSE9BRVdCakJaMUtyYjh3LzNucnNucTArOTdtbG1oCi9IMkpNQ1lIbDRZQi9Bb2pXUnhpVE50b3BsM0k1Sll4Vlkza1J1ZWF2c0pQNElmNmJ1MD0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBdU83QzY5Y2VSNVVuZm5QMkRGTUdTNkc2dkZGMS8zcUlPOWVRU0dKWUVNeXQ4R1pqCjhBY2VucytBM0NLTmNEcnh2TmhCWm9rOU9rbDhKenNFWThYSjExZmVXT0tGcG9HWFRrbHhwbmpHL0VlcG1odXcKUmh1RmFpOWxnUFdCdTJ6L0k4bmZ5N2NJSGxxbURhbVRFYmhCRlorUm85T2IweXUydTJJYVFjVE1aT0RHOXhHUwpFREhPVVhOQjNwbmhYaFhGaFlRNnJ5SlUyQXNjL1NLRWtWUzd0VmNoaFR2Tkh1Z1gvWENYcGdXaUlyYTFGTW1DCkEwYzNjTTc0MmE0UjhCeGYrMGt6Y0FXRmpBQjBzbEV1YXFEUityNklkMlFkclY1QUF1K2tva1NTMkZ4SjcxekcKdW1GdFB2VXJPVTN5eGt5YUlsdnVoVzFkRXlweXhaNUlCb0lDVndJREFRQUJBb0lCQVFDaC9SSmtmdlFaQTcrcQpkbXpwOHJlcS9DbVQxMDhpei9RUlp3c05QSWVqZjRaRTg0dEtyeEhWVGpHem9kaCtuRU12aGNZVHlOY0cvV054CkFiTWdxaG5aTlRDZ2J4dGU5RmpTekdadXlaQ1RYenBpc1NwQTNzNklhcWZneEN3MVBvNW1qT2dwaTFQak1zZ04KWTZKZGZTWVZpTWFMMkVuQU9hUkFrdmdvNy9lUnpBeFowNkhZeEJ5WlM2dzZrRVhpQmJQYThCeUpkd0pIRzRqLwpHSU9yZkhXRWhldkdjcTU3SmF6c3hrTzczZXBYbW5IU21NQWVKcXY0RmF2czBtSUZ4THlRU1NjSmxZZ2xXWlJuCjRmekZVM2tLYmRaak9NdWJpbGY1ODluQUZ4cVpnMzhJblU0bGpzSjRsSFVhU0M1VlZ5UTFvc2pmaGlFeFg1NVMKQmJmSFU3MnhBb0dCQU91T2pHTTlwYi9rQ282ZXhaREZNSlRqbmZMQmtEQlAxNVN6NjByUjY5cjFRQ1hwZk1CWgppT0N1aHZTSnJvRGJQYVVvUXc4QkhvV01BT3d4VU1veXpUZDYwOC8xVWRmY1F2RUtsY0dXSXpGMWJxSTllcFNpCmQ3Q3dSZ2Vxd01rUTF6SHBsYU4rWkY3MEJsZ3RJYkx3Ymd6dWhuSWlZRjVRbi82dWVMQXkzQkhyQW9HQkFNajcKZWoybklVT1JxY2lpeUZwQlVERGUwT01GYWNiYkk2VzA3RUl4cElxRWpLejZPenVmZFBYSFpLVU44QmEwRjlBUgpsSzVBdXd1STErbnFqeTJJOWV3Lzk1QTQ1NGFBYjM0WjRqekUzUm8vS1N0dytQR0xUb0RSeEZneGVhQmYvUkdlClIvZlpTK2h2VHgrYy9pQjRQYU5sOGVpQ2ZFcVFhYS9MbW5WcitncEZBb0dBYnNWN0tWUWROTzdsTkFwZjkrTnoKSkNFaDdyMnRzN3BvTTZxa05Hd2hVTGRTTWtIcGczN1hTbWxvVjJqRG9oNzNqMG91dHNpYzlNcFF5TUdzTDFuUwpmWXVLUGRvc1lhbFg1WWhId21CN0xrLzk5ZGVaWkhvK2ROMkFJU3pnT2Uxc2RURldTQ3N3d2lKWk5YQUx6OTBXCnM0Z2J1MktGRlBVdk9CSU4rVFlCblg4Q2dZRUFyWEdBYTZaSXFaUVNMb2gwV0pkV3llWHY1SXJ1WHVNTW4xdEUKTEZmRkJKa2hBY0lzemVadFBCR29CRnpEM2dQckxPK1BITlhWMVQxeC8zY2h1bzBnbFpJYVpnY0ZudWhGejFBdApFbjVkeE9ITytLTlU0clp5dCs3Ty84RXFra0ZrRndrK1dHRFpCaXpRM3BwUUlOdERiamh6REZGWFM4M1d0eFFCCkp1Wlk4UFVDZ1lBOFlsZnNsQ2VmZktvTVBVK3pKZWF5SUlQNTR0M3NySkFwblNyc1BuNlNBSlB1WjU5ZkYrUTcKNGlvNk0weEw0dE5VckhPQ2ZpODJXSzlRQ29EbDRIYTJGZ0Y3YkNMTTQ1Qld5cVp5THg0b0JRYmFGNE5ybmVuTgpVUVVFbkU2V2ZyN1RFWWVBNngrOG10dXAvcDJzbTd2eWdkYngrRFB1OXYzV1JyZE1DZDBNWUE9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo= diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod b/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod index edd6705..ee32ce4 100644 --- a/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod @@ -1,8 +1,8 @@ apiVersion: v1 clusters: - cluster: - certificate-authority-data: - server: https://127.0.0.1:6443 + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN5RENDQWJDZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1EVXdOekE0TURFME9Gb1hEVE14TURVd05UQTRNREUwT0Zvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTXU2CmRJYWVmOWQzYXByK3lKK3BLY0EyaU5NQXlQVWFsbFgyZHBwelprd282T0R2TW9FYmgrRWFaY0Y1aEdMVnhSWDYKbnhtcUQvVFNCWXdENFNzY1E3c0YzcWtxeWxBLzJXaTg1NTJKbGJQcXFSQm5CaEUwV211ZE9EVXZYYVF0N3BnWApzR1JKcDROcFBsd0tLMUVpZmhsdkJIMmRVWHFjZDRENmZKbWRSSWgrNEpOS3ZOL09Hait4WjNKUG5Cc0pKOUlICms4TWFsc3NuTTYvaFpna0tKVlplc2YvcVorN2I3dXpJSVJteEd3L0RBcmtNaGgvL1VCZUUzVEFsd1lWWisxZWkKU293eldEN3EzYzFhU3NJYkdrbXJWaXNQcVVZTk9sUEplcHJLTVFJRUJKVEdPWUYzSzk0eHNKaEtBMkI0Z0VCRgpOdkJxTzVqZFZ4RkM0SFdxM1VFQ0F3RUFBYU1qTUNFd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFLV254YlZqTFRWZVVyZDB0Q004SE83ejk2QnQKQTBGS3pmclRSTkhtT0I2WVVSRnVRbWZJdE9GTkY4QnJoYVBCZVNKMFZrNVdNUXZBd1BkdnY2R2l1NU1VNU45TApHV3R2eXhsS0Z5aVkxR25RUy9sWjRjR1JaSE9kMmtMNFY3bVNLQmo3ZFpzcDN0dW42d3BQZWM3dUJ6Z1UvNzdxCjN1b3BGMGVzR21wY3ZFaVhNSlRrZUN1NTNhaTVFVHhSS0Q0V0xxUUFhbDViUlB4b0UwL3Mrams5SGI4b1JuNnQKa0RDaElpQnVjL3RDaHhwTmNFVWt6UUwyRjBHR2hRaVlQTFFQdkgzRml5S2tlWjBiYUNHdFdpODEvZG9lVVJKZQpOM21jU3pvZEM1SUhMc1Zta05HTzh3Y2pocjVZck0yVGxHS01sV1huRVk5QWFMUEl1blI0VmQ4ZU1iWT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://10.105.0.1:6443 name: kubernetes contexts: - context: @@ -15,5 +15,5 @@ preferences: {} users: - name: kubernetes-admin user: - client-certificate-data: - client-key-data: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM4akNDQWRxZ0F3SUJBZ0lJSDByc3FWMTBudUF3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRBMU1EY3dPREF4TkRoYUZ3MHlNakExTURjd09EQXhOVEJhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXUwVlR6ejIvK1VqbEtVRHkKL211bVYrbTBjejg1UURTUjU1aXN5QmNDTWVzbEJsSEYyVEhWQm9FRHhUMEFHQmF5ai8rdEx6c1J4Vjg4Z28vNQpkZFVZL0llNllNVmVlMVZuQ29CL2VvYStkUDBQZUlhZVl3amU3WTJhdFhpMDBOM3EzZ253Q1AvM3FodUpabDBTClQraTJHMktOQlRvbjYvSGUxRVBva0hlcTZaMU5yYm5aTWROTzBWM1VaTExzMXdhS2ZESDJHRStlang1QzU4VEQKSnVGdjN5QkFPWW5CblI0YTBObWJpTFJ2RmN3d1BFR24wamlHazZGY2oyK0RWRlVCQjYzbVFOV3puWGVlaDQzVwpodm9GQnZKUVg0OFZTeng0U2tHVURtaUU1Sktncms0T3dJNk9vR0Fsa0kvbU4wQnl2b3o4UjFFbENMY29jZzJWCkhvN1Rrd0lEQVFBQm95Y3dKVEFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFNSGpQdnNNWDUyMjNzbTZTQTJzYlBzc3NaalJJSUhCSWloUwp6MlBVR0Q2R2NXZ0RRRXBzRWVRRDYzby9vLzVLcndBbHUveGlnVW9VK2dkRTQ0S29PTmM0Z05tdHdZOVhzcnZXCmZSamc3YXh2MGN5czBuSzBCdVJyYjBQVDZ0ZkVqb25yQXUvQ1NuSG9LZERuaVBweUNQSys5amZONGpJR1VobFkKU0Z4Qnh6N3ZGQlUrRlVZbjBYR3BHV3FnQjd6bVRwRllSVXNKOVc5eXJlaWlsZDVJcDdzSGljZG0za0NiYlJWagpmQ1k0eVV5elVSbFV2Q2xrOE8zQktEQkYxeWZIUGErUW1STStabzhEK3V2T2VHVVFqSW92eHdXNVNPM1RkNkE5Clk2WEoxbEpwRmtUWndrMk13N1N0RjhxK3MwMGFnNFB0RGJhKzNoM2FmWG1DWU9FOG5pUT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBdTBWVHp6Mi8rVWpsS1VEeS9tdW1WK20wY3o4NVFEU1I1NWlzeUJjQ01lc2xCbEhGCjJUSFZCb0VEeFQwQUdCYXlqLyt0THpzUnhWODhnby81ZGRVWS9JZTZZTVZlZTFWbkNvQi9lb2ErZFAwUGVJYWUKWXdqZTdZMmF0WGkwME4zcTNnbndDUC8zcWh1SlpsMFNUK2kyRzJLTkJUb242L0hlMUVQb2tIZXE2WjFOcmJuWgpNZE5PMFYzVVpMTHMxd2FLZkRIMkdFK2VqeDVDNThUREp1RnYzeUJBT1luQm5SNGEwTm1iaUxSdkZjd3dQRUduCjBqaUdrNkZjajIrRFZGVUJCNjNtUU5Xem5YZWVoNDNXaHZvRkJ2SlFYNDhWU3p4NFNrR1VEbWlFNUpLZ3JrNE8Kd0k2T29HQWxrSS9tTjBCeXZvejhSMUVsQ0xjb2NnMlZIbzdUa3dJREFRQUJBb0lCQUVYR0pOM1lZZ2lkY2xTVwprSExlNVJGb1VBV0lqdW92TEJXZ092QXFNblVxNlphYkxSNHBoUGR4WmxnOHpDWXRmc1pNT3RpWUo1emtTUVZVClkxdlYxQU56QnF3N25XSlNoWnZTR0swc094WVhtNFlLa2tUUDcwK1BMUTlrTStxR1pKWHFHZmNnZDhSM2toQUQKcVdrQWlhbFdaTGlIM0l2NmlFMktKOEo3ODhBcWFrM1liemJFR0xvUkxKbzZONjZzSm9CcHRUS3BOOENpcjF0bQozR1MvRllZK2ZzcVFudU8vWkNjOUl1ck5scVJkU0ZNdThRT2pRMFp6TkJqMVVEZFBNa24vWEhWN3BIQWF1NkVrCm5lLzlrSUNWMWxMZ2FuK0lLYW1kVElvZk5aVUhsL3k5QksvZ1U0SFRVMGo1Sit5WkV0bXI2VktmeGloQks5aWcKelZRM1R0RUNnWUVBNnh0b0RRd2dFRmNSdEJ3WXZQZWR5L0FZTjlwc0ZjaDhGRFJJUXlxUlhUWUVURlRjaXFNQwpwMHYyaXY2dHdoWUo1QmZVcHBJUU1kK2c3NjFiZHdqTHpweEJKZHpzakwySE5HMzhaeWxZTXc1LzBIT1FOTHFoCmt2VW9rMVJHQkxhOGFuOFA2eGdvNXptMHY3SW11MGs4RVQ0bjI5aDJJclVVSm1pWkZMM3dBMThDZ1lFQXkrbXEKUUR3NktJc1lDTU44NnBzSXAvQU5zTWErVlJ5cFJRMWYzQStSSjdnNkhYRGxqOVlLQWIxSEdobnMwKy80ZzRrRQpoTUczZmVNM2hoc281aVhCNjRPS3RkQkFodVA1UkF0RDU0eDY2MHhtYnZqY3YyVFJFS0pLMGs5SXBSU1ppa2pGCmVVTG1rbzZnUmU1aVI5NU5NQ0o5NFRBQW80SXNEZjJxQ0pqRU1FMENnWUJFRjZMeUxISFk2YTdKOEYxRjFaMlIKSkUrUFZhWjZSSitUSm5WTFpyZkZQRkRRWHIrbE00TWdPd01EekxFOGhpK0ZMVlc4akk4K01wdWs3eHVQaFMrcAovbDFyL3VsUDlkQ0Q0ZHI1Y2VNR25vdHNMeHd6K1YyMGQyYXlEUFZlaGlKWjRjVVZmT0RUMzBXM1EzeXVQNDZ6Ckc4SmxqUExpS0huV2lmTFVMQktvbHdLQmdBeU45Mmg3RE0yZ09ydVhaYUtBSnhsSDQxL2w3S1FLM3JVY3JMRTgKMkNBTTdLOFJXMkR1dWJEL3VWZjNEcWpCMXBncW9IZVlBYmNqZlRDcGpXd1dHUWxxRU9rK3lDcEY4UHZxZ2FUSQo5bXgwU0w0K1hrRCtjUXpJSVRrdm5uWmpmVXlSVEc4NTJqNWR2NnB1a2VpQTNGbkJWZVMrY3R1ZVVSNFBaeCtlCllEM2xBb0dBRWRId3VVa2ZBYWZJMFdkOXdlYXNDb1daTUlDTm1MK1ZDcXEzMEZyamVFMXg0cElTNjdFODJkU2UKKy9TbDNkWXpucHkxODJVdGJQNzlSYThoNXloOUVCTzVYMXlweGlCeCt3MmdwNWVJWG00aStOUWZxbjNOUm42awo2VW1RY1hCMXV4djNKWENuYWxVUk5tVWdudEFzaEQxaGd0Y1VvSzArS0NXT3VhUnFqSTQ9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_test b/dubhe-server/common-k8s/src/main/resources/kubeconfig_test new file mode 100644 index 0000000..bbd3599 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_test @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM1ekNDQWMrZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1USXdPREE0TWpnd00xb1hEVE14TVRJd05qQTRNamd3TTFvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTWVKCkRjTThQUHFlV0tHazE3S2JtN3kwbzkzTGtxb2w5dG1QbVN0YTVyUXZSWU9rOFJYK2QvVEpVaXE2S2c4NXFyMGUKZFZ3SG5vTjg3Q0VFbXhOK2o4OUhlcnY1dDFaRFBHMVRHS3U5N1l1WnV4RCt2R1hGQkhDZ2ViT0s0Zmc0am1iSgpwUmlJeCtUZmlyUjM4TlVhZTBkR1g5c1o1TWF2NkhiSVVEK2hNTm9VU2pyWUZ2bTcxbGtYdlhVWW9wd1FSR3FvCkZiWG5KTFFVakd3UFFRclRCQ0wycFE2c1dGbkpkQ0VBSHhXU0grTUN4cXV0L25SUHhORlFOWGFMZmU3clB3a3MKd3NrVlFlTzgyL205aFZkWnNnNUFnRDJtTS9jS2VybE9acVBGcWFEaFBoYnpkRzF4QjRtcVZmTGJVUUppa3I1SQpYQ1FhTWR2c2FRTnhPcmNPOHQwQ0F3RUFBYU5DTUVBd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0hRWURWUjBPQkJZRUZFek92OGhVUGtRYS90bGNhQTE0OERmcmF5TXRNQTBHQ1NxR1NJYjMKRFFFQkN3VUFBNElCQVFBMTFqcDR4V1hyZkRPNDJlblBpdm9HNXZxY3hqbysxaktZV1YxR1NIdHNVdlBNU1UzVAptdVFCQWU2VVRzM0JtSTgzMUQ4aFd4cDlka3BkMlhWbCt6OTZabFIvQ2Qrb3NSWWE1MUpZd2RvMUp4Z29hbUk2CmszOS9sNTRBQVpWMkFPWWNQU0pKNkNzdVcxZ2NTMDlaTlA0djlhK0NIKzY5OVpQMGlhUStEc0FPRFJzUnhRcVMKK08zNUU4K0RzRjBSOWZCYlhvbkxiK05VSHlTQ2pQSnVVT21lS0RYMFpQQm81blNscmh5cUIxbHVNZnhEaUltdgpIU3BscXB4M1duMzJnNEZYOCs3clZBNUZjWkIvNDg3bUZmdG1CTXc3aENidlFJeWY2Ymd6VTUycUdaeFZGTTVjCnUxeEZyRXlXaFA1eElJY09lY1M1RCtJa29ZOHUyT3ozQldEeQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + server: https://10.105.0.6:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURFekNDQWZ1Z0F3SUJBZ0lJQjUxN3hkT3V2cnd3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRFeU1EZ3dPREk0TUROYUZ3MHlNakV5TURnd09ESTRNRFZhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXlFaURrYVJ1WmVUbVJ4WmsKRkxUbkRPMUxIM1k5dE80WXpXREt4QVA2bFphTUJwZXJWRWpiU1VMOGIzdG9WWFppVnVnaHhvMzRHdjF4dlJKLwpDN3NhWGVaU3EzcTVMcEFhQjBrelRDNFRtbGtrdE03YkJTZFNhMG9SeGJXTUdvVEM0WUVBckJCNS9FRmdUYjRCCnE4aG1USUlmdUcwa05sNmd5a2M4UnhSL3N5Q1lSVjZCTTZTYWFzODR1Tmx3ZU9vV2RaQkQ0UVZQbzJueUI4NGQKUnc0d0w2VEQ5MlUwMG5KcjY3bUtpTjhBNGtBampFbWNrNm1JbHdsUTB2MHR5Uk8zTFBXZFdNazZ5QVIyVEZ6RgpBZCs4dUd5L3VDQlB6UDlVcDFERkI5TDF0Y25sd1lTeTYycGdFWC9wMzl6ZjZ0RFpZRVJwK0Jpd21Eb3RmVmFaClNLam9md0lEQVFBQm8wZ3dSakFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0h3WURWUjBqQkJnd0ZvQVVUTTYveUZRK1JCcisyVnhvRFhqd04rdHJJeTB3RFFZSktvWklodmNOQVFFTApCUUFEZ2dFQkFCQ01US2VEV1dHa0VFZWp6dlN3TTFKZit0QUJaYklrd0FoZEt1MjFnUkxyQktzbHQ4Y0NHMG1KCmc3eDhndFJYcW0xRGlEU01DMnlWMW9OWTFSdGlLZVhZT29RbVY2SFh2bEQybWZyY2VzS3VSNHlIbCtlWHcyL1AKb2FLZVFEM21yVm9ZZ1ZMMVNqOWpzcDRBa1hEM1ErUzdKYXN4Y3MvVkR3SmlRQzhGamkzaWZMVFFKQkxCdm1WVwpCZ3QvM0xPZ1lCdWxYRWg3cFlQUzduTVFqNFpxQ0QwWTNJOENicUM0VGlZQjB1aTZlbVRDd3cvRnBNSzNIRFZ4CnZOd3FPRHBaWW5DK0IxSzR5RUk4NGIvM1o3ZXA0MUE1NGk3QkpxTklORUVtRjdGcmpQQy91V1RuYlpzVUNFQjAKWTFUR1Y3czNBUTZMZXNCRFF6UVRzVnBENWFRRk9VYz0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBeUVpRGthUnVaZVRtUnhaa0ZMVG5ETzFMSDNZOXRPNFl6V0RLeEFQNmxaYU1CcGVyClZFamJTVUw4YjN0b1ZYWmlWdWdoeG8zNEd2MXh2UkovQzdzYVhlWlNxM3E1THBBYUIwa3pUQzRUbWxra3RNN2IKQlNkU2Ewb1J4YldNR29UQzRZRUFyQkI1L0VGZ1RiNEJxOGhtVElJZnVHMGtObDZneWtjOFJ4Ui9zeUNZUlY2QgpNNlNhYXM4NHVObHdlT29XZFpCRDRRVlBvMm55Qjg0ZFJ3NHdMNlREOTJVMDBuSnI2N21LaU44QTRrQWpqRW1jCms2bUlsd2xRMHYwdHlSTzNMUFdkV01rNnlBUjJURnpGQWQrOHVHeS91Q0JQelA5VXAxREZCOUwxdGNubHdZU3kKNjJwZ0VYL3AzOXpmNnREWllFUnArQml3bURvdGZWYVpTS2pvZndJREFRQUJBb0lCQUgrbHY3ME9TSkpHZmdHbQpvcWlUMTRKa3BuRnA0ZEF6dzdqNXpLRjdTN1VWR3krRWNOeXFCcUM5d2JlbnRvcHBoaW1Qang5R0VtL1pRaWxYCjVZTHJmOVdDMndPUmx2NjNOdStYMXNyaHZ2cXJmL3FBc0JTcnlCcTdQWEo5ejhxQy9OWE9hMGcreEJCaTltYjIKQjRpZGs1MkZmWVFFZzRUbmNLRWJINjdKd00yL2dmZ3BXMGNEd0RiNVc2aHIxclUzNkNGc1AyVTlEYytMVkV5cQpvK3FZZ0RFZ0MxSWxucGVIdzROejJmK3RkbFpLVHN5dzV1ZG5xVm1kWGNkLyszR3NvQVM3Q25DTmQ3a1pKQ1BPClNpRktZSDZZUS93ZmJLZjFibC84eERXU1UvKytQUTZyODJGV21mMFBjd3ExbWV5OXIvUW1kaHc5YndiamkxVDQKdURjSzUva0NnWUVBOURIZi8xZnN4czlqVldoaE94UXFzUFN3UzdDbVZYcHV2S2Q3MjlMRGFIV1VQT3E5Y0l4QQpaV01Ibi9EbEN3U2dxaitPL0FNSDdaQVpPbDV5V2RiZFVEM2I0WWVKLzlubXdvN25ReG9ndTlUdTlNZEVnM0pvCm1wTzVqQTYzYTE2Z0sxRWlmTDJkdllRQkJGQzg2aDJzYlV0WHNGRDV6R3QzTU96bVF6SFFQM3NDZ1lFQTBmY3gKMzlUR0YrZFdVb3MzOXRLaE9MZVpFaWtUSmtqckdrTUdldHpYNEVCRE5oRUUvdWNkYUZCWmJoU3ltZHAwMmtKNgpKQkpvaEJRaEN4Zk9nUXhRdFp2dUt3WUMvTEYrMVF1TG1MaERGV2NEa05Ma1E0RWU5WVRFV1VuNE9NNkJJTmVOCldseklzcncyeHRvaUVoMXZFWW92ODdobEZCQ0E4N0xYSElpa1NjMENnWUFnZ1pNajFueDZhcGo2Z1k2UDRydGMKR3ZaczNQTUhaZWpmekJ1OHcyMm50aDhwak1YeStYaUpCb201VE1Qd2w0a2JvS0pVQWNOSWFHb3pUdWRCOWt3Mwpwa1JpM1R3Tmh1QWsvZ2ppOGROeHJZS3hxdVQvNm5icVBNZnlVUE14bzhNR2dTSnFJSU9pK3JOMzg1ZHlRc2oxCk01WlVyakxTZDJ2Q1k0YXpZeW9waVFLQmdBS0RBb1c2OWVPelVNVU1CVTllbGE4b0F4ekVnUVlrT1N2SFFYeVcKeDc1WEJuYlBIVkF2VTVxNzIxWUZ5VjB0ejlnTWs0bDY3dVVsbUgrWmVVN3g1c1ZGYUVQN2ZtMm5jZXo2aG1EOQpVMUFlTzF5d0tTcmxrSWsvWFZuMEdKUVZaRllRUGhDbXEzLzM0Ry9nakNmTFVsalRYbDk4QlRtSU9RS3hVUUYyCllRREJBb0dBRTB5Q0NzcFE3Y1lYaVFyQnF4VVkxU2N5SjViVm1iaFN5cmgyb3FiUmRMRmxzWHFjUnBIK2tORjYKdXdjUzhZalJodmdQazgyZm1QcXlieE5ZWGxTYWoydGdzSFdpcGNETXZSM1dtSlQzTGpjeXZObDh6cUlkZDNBdQpJT1pNeG0vWWVMemNFOUdteDRsQkhmMnU5MkhiRjRiSCtqeVp3d3RWcHpRdmphaFF6dGs9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== diff --git a/dubhe-server/common-k8s/src/main/resources/mapper/K8sGpuConfigMapper.xml b/dubhe-server/common-k8s/src/main/resources/mapper/K8sGpuConfigMapper.xml new file mode 100644 index 0000000..cea9870 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/mapper/K8sGpuConfigMapper.xml @@ -0,0 +1,12 @@ + + + + + + + insert into k8s_gpu_config (namespace,gpu_type,gpu_model,k8s_label_key,gpu_limit) values + + (#{item.namespace}, #{item.gpuType}, #{item.gpuModel}, #{item.k8sLabelKey}, #{item.gpuLimit}) + + + \ No newline at end of file diff --git a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/config/RecycleConfig.java b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/config/RecycleConfig.java index b698b81..56cb46a 100644 --- a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/config/RecycleConfig.java +++ b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/config/RecycleConfig.java @@ -68,5 +68,9 @@ public class RecycleConfig { * 回收serving相关文件后,回收文件最大有效时长,以天为单位 */ private Integer servingValid; + /** + * 用户删除tadl算法版本文件后,文件最大有效时长,以天为单位 + */ + private Integer tadlValid; } \ No newline at end of file diff --git a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleModuleEnum.java b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleModuleEnum.java index 11cb250..5fbf13f 100644 --- a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleModuleEnum.java +++ b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleModuleEnum.java @@ -39,7 +39,8 @@ public enum RecycleModuleEnum { BIZ_MODEL(7, "模型管理",SERVER_MODEL), BIZ_DATAMEDICINE(8, "医学影像",SERVER_DATA_DCM), BIZ_MEASURE(9, "度量管理",SERVER_MEASURE), - BIZ_SERVING(10, "云端Serving", SERVER_SERVING); + BIZ_SERVING(10, "云端部署", SERVER_SERVING), + BIZ_TADL(11,"自动机器学习",SERVER_TADL); private Integer value; diff --git a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleResourceEnum.java b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleResourceEnum.java index 5358662..6915e28 100644 --- a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleResourceEnum.java +++ b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/enums/RecycleResourceEnum.java @@ -51,6 +51,15 @@ public enum RecycleResourceEnum { BATCH_SERVING_RECYCLE_FILE("batchServingRecycleFile", "云端Serving批量服务文件回收"), /** + * tadl算法文件回收 + */ + TADL_ALGORITHM_RECYCLE_FILE("tadlAlgorithmRecycleFile", "tadl算法文件回收"), + /** + * tadl实验文件回收 + */ + TADL_EXPERIMENT_RECYCLE_FILE("tadlExperimentRecycleFile","tadl实验文件回收"), + + /** * 标签组文件回收 */ LABEL_GROUP_RECYCLE_FILE("labelGroupRecycleFile", "标签组文件回收"), diff --git a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/utils/RecycleTool.java b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/utils/RecycleTool.java index 466ff38..cf2a88c 100644 --- a/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/utils/RecycleTool.java +++ b/dubhe-server/common-recycle/src/main/java/org/dubhe/recycle/utils/RecycleTool.java @@ -197,7 +197,7 @@ public class RecycleTool { if (sourcePath.length() > nfsBucket.length()) { String emptyDir = recycleFileTmpPath + randomPath + StrUtil.SLASH; LogUtil.info(LogEnum.GARBAGE_RECYCLE, "recycle task sourcePath:{},emptyDir:{}", sourcePath, emptyDir); - process = Runtime.getRuntime().exec(new String[]{"/bin/sh", "-c", String.format(ShellFileStoreApiImpl.DEL_COMMAND, userName, ip, emptyDir, emptyDir, sourcePath, emptyDir, sourcePath)}); + process = Runtime.getRuntime().exec(new String[]{"/bin/sh", "-c", String.format(ShellFileStoreApiImpl.DEL_COMMAND, emptyDir, emptyDir, sourcePath, emptyDir, sourcePath)}); } return processRecycle(process); diff --git a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/async/TrainAlgorithmUploadAsync.java b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/async/TrainAlgorithmUploadAsync.java index 9f66d51..b21dff8 100644 --- a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/async/TrainAlgorithmUploadAsync.java +++ b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/async/TrainAlgorithmUploadAsync.java @@ -22,7 +22,6 @@ import org.dubhe.algorithm.dao.PtTrainAlgorithmMapper; import org.dubhe.algorithm.domain.dto.PtTrainAlgorithmCreateDTO; import org.dubhe.algorithm.domain.entity.PtTrainAlgorithm; import org.dubhe.biz.base.context.UserContext; -import org.dubhe.biz.base.dto.NoteBookAlgorithmUpdateDTO; import org.dubhe.biz.base.enums.AlgorithmStatusEnum; import org.dubhe.biz.base.exception.BusinessException; import org.dubhe.biz.file.api.FileStoreApi; @@ -35,7 +34,6 @@ import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Component; import javax.annotation.Resource; -import java.util.Arrays; /** * @description 异步上传算法 @@ -103,14 +101,6 @@ public class TrainAlgorithmUploadAsync { ptTrainAlgorithm.setAlgorithmStatus(AlgorithmStatusEnum.SUCCESS.getCode()); //更新fork算法新路径 trainAlgorithmMapper.updateById(ptTrainAlgorithm); - //保存算法根据notbookId更新算法id - if (trainAlgorithmCreateDTO.getNoteBookId() != null) { - LogUtil.info(LogEnum.BIZ_ALGORITHM, "Save algorithm Update algorithm ID :{} according to notBookId:{}", trainAlgorithmCreateDTO.getNoteBookId(), ptTrainAlgorithm.getId()); - NoteBookAlgorithmUpdateDTO noteBookAlgorithmUpdateDTO = new NoteBookAlgorithmUpdateDTO(); - noteBookAlgorithmUpdateDTO.setAlgorithmId(ptTrainAlgorithm.getId()); - noteBookAlgorithmUpdateDTO.setNotebookIdList(Arrays.asList(trainAlgorithmCreateDTO.getNoteBookId())); - noteBookClient.updateNoteBookAlgorithm(noteBookAlgorithmUpdateDTO); - } } else { ptTrainAlgorithm.setAlgorithmStatus(AlgorithmStatusEnum.FAIL.getCode()); trainAlgorithmMapper.updateById(ptTrainAlgorithm); diff --git a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/dto/PtTrainAlgorithmCreateDTO.java b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/dto/PtTrainAlgorithmCreateDTO.java index f1dc9dd..df18138 100644 --- a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/dto/PtTrainAlgorithmCreateDTO.java +++ b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/dto/PtTrainAlgorithmCreateDTO.java @@ -67,7 +67,7 @@ public class PtTrainAlgorithmCreateDTO implements Serializable { private String codeDir; @ApiModelProperty(value = "运行命令,管理员使用") - @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "运行命令-输入长度不能超过128个字符") + @Length(max = MagicNumConstant.EIGHT_THOUSAND_ONE_HUNDRED_NINETY_TWO, message = "运行命令-输入长度不能超过8192个字符") private String runCommand; @ApiModelProperty("运行参数(算法来源为我的算法时为调优参数,算法来源为预置算法时为运行参数),管理员使用") diff --git a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/vo/PtTrainAlgorithmQueryVO.java b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/vo/PtTrainAlgorithmQueryVO.java index 4593b62..a4f3171 100644 --- a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/vo/PtTrainAlgorithmQueryVO.java +++ b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/domain/vo/PtTrainAlgorithmQueryVO.java @@ -61,6 +61,7 @@ public class PtTrainAlgorithmQueryVO implements Serializable { private String runCommand; @ApiModelProperty(value = "运行参数") + @Deprecated private JSONObject runParams; @ApiModelProperty(value = "算法用途") @@ -87,6 +88,9 @@ public class PtTrainAlgorithmQueryVO implements Serializable { @ApiModelProperty(value = "创建人") private Long createUserId; + @ApiModelProperty(value = "创建人用户名") + private String createUserName; + @ApiModelProperty(value = "创建时间") private Timestamp createTime; diff --git a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java index d324d87..c928d80 100644 --- a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java +++ b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java @@ -17,7 +17,6 @@ package org.dubhe.algorithm.service.impl; -import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; @@ -39,13 +38,16 @@ import org.dubhe.algorithm.domain.vo.PtTrainAlgorithmQueryVO; import org.dubhe.algorithm.service.PtTrainAlgorithmService; import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.constant.NumberConstant; +import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.biz.base.context.UserContext; import org.dubhe.biz.base.dto.*; import org.dubhe.biz.base.enums.AlgorithmSourceEnum; +import org.dubhe.biz.base.enums.AlgorithmStatusEnum; import org.dubhe.biz.base.enums.DatasetTypeEnum; import org.dubhe.biz.base.enums.ImageTypeEnum; import org.dubhe.biz.base.exception.BusinessException; import org.dubhe.biz.base.service.UserContextService; +import org.dubhe.biz.base.utils.CommandUtil; import org.dubhe.biz.base.utils.ReflectionUtils; import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.base.vo.DataResponseBody; @@ -58,6 +60,7 @@ import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.biz.permission.annotation.DataPermissionMethod; import org.dubhe.biz.permission.base.BaseService; +import org.dubhe.cloud.authconfig.service.AdminClient; import org.dubhe.k8s.utils.K8sNameTool; import org.dubhe.recycle.config.RecycleConfig; import org.dubhe.recycle.domain.dto.RecycleCreateDTO; @@ -116,6 +119,9 @@ public class PtTrainAlgorithmServiceImpl implements PtTrainAlgorithmService { @Resource(name = "hostFileStoreApiImpl") private FileStoreApi fileStoreApi; + @Resource + private AdminClient adminClient; + public final static List FIELD_NAMES; static { @@ -176,11 +182,29 @@ public class PtTrainAlgorithmServiceImpl implements PtTrainAlgorithmService { ptTrainAlgorithmQueryDTO); throw new BusinessException("查询训练算法列表展示异常"); } + + Map idUserNameMap = new HashMap<>(); + List userIds = ptTrainAlgorithms.getRecords().stream().map(PtTrainAlgorithm::getCreateUserId).filter(Objects::nonNull).distinct().collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(userIds)) { + DataResponseBody> result = adminClient.getUserList(userIds); + if (result.getData() != null) { + idUserNameMap = result.getData().stream().collect(Collectors.toMap(UserDTO::getId, UserDTO::getUsername, (o, n) -> n)); + } + } + Map finalIdUserNameMap = idUserNameMap; + List ptTrainAlgorithmQueryResult = ptTrainAlgorithms.getRecords().stream().map(x -> { PtTrainAlgorithmQueryVO ptTrainAlgorithmQueryVO = new PtTrainAlgorithmQueryVO(); BeanUtils.copyProperties(x, ptTrainAlgorithmQueryVO); + ptTrainAlgorithmQueryVO.setRunCommand(CommandUtil.buildPythonCommand(x.getRunCommand(), x.getRunParams())); + ptTrainAlgorithmQueryVO.setRunParams(null); //获取镜像名称与版本 getImageNameAndImageTag(x, ptTrainAlgorithmQueryVO); + //获取算法创建人用户名 + if (BaseService.isAdmin(user) && x.getCreateUserId() != null) { + finalIdUserNameMap.get(x.getCreateUserId()); + ptTrainAlgorithmQueryVO.setCreateUserName(finalIdUserNameMap.getOrDefault(x.getCreateUserId(), null)); + } return ptTrainAlgorithmQueryVO; }).collect(Collectors.toList()); return PageUtil.toPage(page, ptTrainAlgorithmQueryResult); @@ -229,13 +253,8 @@ public class PtTrainAlgorithmServiceImpl implements PtTrainAlgorithmService { Integer countResult = ptTrainAlgorithmMapper.selectCount(queryWrapper); //如果是通过【保存至算法】接口创建算法,名称重复可用随机数生成新算法名,待后续客户自主修改 if (countResult > 0) { - if (ptTrainAlgorithmCreateDTO.getNoteBookId() != null) { - String randomStr = RandomUtil.randomNumbers(MagicNumConstant.FOUR); - ptTrainAlgorithm.setAlgorithmName(ptTrainAlgorithmCreateDTO.getAlgorithmName() + randomStr); - } else { - LogUtil.error(LogEnum.BIZ_ALGORITHM, "The algorithm name ({}) already exists", ptTrainAlgorithmCreateDTO.getAlgorithmName()); - throw new BusinessException("算法名称已存在,请重新输入"); - } + LogUtil.error(LogEnum.BIZ_ALGORITHM, "The algorithm name ({}) already exists", ptTrainAlgorithmCreateDTO.getAlgorithmName()); + throw new BusinessException("算法名称已存在,请重新输入"); } //校验path是否带有压缩文件,如有,则解压至算法文件夹下并删除压缩文件 if (path.toLowerCase().endsWith(AlgorithmConstant.COMPRESS_ZIP)) { @@ -251,11 +270,21 @@ public class PtTrainAlgorithmServiceImpl implements PtTrainAlgorithmService { try { //算法未保存成功,抛出异常,并返回失败信息 ptTrainAlgorithmMapper.insert(ptTrainAlgorithm); - //设置子线程共享 - ServletRequestAttributes servletRequestAttributes = (ServletRequestAttributes) RequestContextHolder.getRequestAttributes(); - RequestContextHolder.setRequestAttributes(servletRequestAttributes, true); - //上传算法异步处理 - algorithmUpdateAsync.createTrainAlgorithm(userContext.getCurUser(), ptTrainAlgorithm, ptTrainAlgorithmCreateDTO); + if (ptTrainAlgorithmCreateDTO.getNoteBookId() != null) { + //保存算法根据notbookId更新算法id + NoteBookAlgorithmUpdateDTO noteBookAlgorithmUpdateDTO = new NoteBookAlgorithmUpdateDTO(); + noteBookAlgorithmUpdateDTO.setAlgorithmId(ptTrainAlgorithm.getId()); + noteBookAlgorithmUpdateDTO.setNotebookIdList(Collections.singletonList(ptTrainAlgorithmCreateDTO.getNoteBookId())); + noteBookClient.updateNoteBookAlgorithm(noteBookAlgorithmUpdateDTO); + ptTrainAlgorithm.setAlgorithmStatus(AlgorithmStatusEnum.SUCCESS.getCode()); + ptTrainAlgorithmMapper.updateById(ptTrainAlgorithm); + } else { + //上传算法异步处理 + //设置子线程共享 + ServletRequestAttributes servletRequestAttributes = (ServletRequestAttributes) RequestContextHolder.getRequestAttributes(); + RequestContextHolder.setRequestAttributes(servletRequestAttributes, true); + algorithmUpdateAsync.createTrainAlgorithm(userContext.getCurUser(), ptTrainAlgorithm, ptTrainAlgorithmCreateDTO); + } } catch (Exception e) { LogUtil.error(LogEnum.BIZ_ALGORITHM, "The user {} saving algorithm was not successful. Failure reason :{}", user.getUsername(), e.getMessage()); throw new BusinessException("算法未保存成功"); diff --git a/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml index 94dd2df..04bbfd5 100644 --- a/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml @@ -10,8 +10,8 @@ spring: nacos: config: enabled: true - namespace: dubhe-server-cloud-prod - server-addr: 127.0.0.1:8848 + namespace: dubhe-server-cloud-dev + server-addr: 10.105.1.133:8848 shared-configs[0]: data-id: common-biz.yaml group: dubhe @@ -33,5 +33,5 @@ spring: enabled: true namespace: dubhe-server-cloud-dev group: dubhe - server-addr: 127.0.0.1:8848 + server-addr: 10.105.1.133:8848 diff --git a/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml index adc8619..84d4b65 100644 --- a/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml @@ -13,8 +13,8 @@ spring: nacos: config: enabled: true - server-addr: 127.0.0.1:8848 - namespace: dubhe-server-cloud-prod + server-addr: 10.105.1.133:8848 + namespace: dubhe-server-cloud-dev shared-configs[0]: data-id: common-biz.yaml group: dubhe @@ -44,7 +44,7 @@ spring: enabled: true namespace: dubhe-server-cloud-dev group: dubhe - server-addr: 127.0.0.1:8848 + server-addr: 10.105.1.133:8848 # 配置允许后面的Bean覆盖前面名称重复的Bean main: allow-bean-definition-overriding: true \ No newline at end of file diff --git a/dubhe-server/dubhe-data-task/src/main/java/org/dubhe/task/data/DataTaskExecuteThread.java b/dubhe-server/dubhe-data-task/src/main/java/org/dubhe/task/data/DataTaskExecuteThread.java index c017a3e..324b8d7 100644 --- a/dubhe-server/dubhe-data-task/src/main/java/org/dubhe/task/data/DataTaskExecuteThread.java +++ b/dubhe-server/dubhe-data-task/src/main/java/org/dubhe/task/data/DataTaskExecuteThread.java @@ -559,7 +559,7 @@ public class DataTaskExecuteThread implements Runnable { for (List el : lists) { List fileIds = csvImportSaveDb(el, dataset); LogUtil.info(LogEnum.BIZ_DATASET, "table import transport to es datasetid:{}", datasetId); - fileService.transportTextToEs(dataset, fileIds); + fileService.transportTextToEs(dataset, fileIds,Boolean.FALSE); } } //------- 导入完成后 更改数据集状态 --------- diff --git a/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml index 729037d..447c484 100644 --- a/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml @@ -10,8 +10,8 @@ spring: nacos: config: enabled: true - namespace: dubhe-server-cloud-prod - server-addr: 127.0.0.1:8848 + namespace: dubhe-server-cloud-dev + server-addr: 10.105.1.133:8848 shared-configs[0]: data-id: common-biz.yaml group: dubhe @@ -38,7 +38,7 @@ spring: enabled: true namespace: dubhe-server-cloud-dev group: dubhe - server-addr: 127.0.0.1:8848 + server-addr: 10.105.1.133:8848 # 配置允许后面的Bean覆盖前面名称重复的Bean main: allow-bean-definition-overriding: true \ No newline at end of file diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/constant/ErrorEnum.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/constant/ErrorEnum.java index f0dc878..220653a 100755 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/constant/ErrorEnum.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/constant/ErrorEnum.java @@ -112,6 +112,7 @@ public enum ErrorEnum implements ErrorCode { DATASET_NOT_ANNOTATION(1718, "数据集暂不支持自动标注"), DATASET_NOT_OPERATIONS_BASE_DATASET(1719, "禁止操作内置的数据集"), DATASET_PUBLISH_REJECT(1720, "文本暂不支持多版本发布"), + DATASET_CHECK_VERSION_ERROR(1721,"目标版本不存在"), /** * 数据集版本校验 diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetVersionFileMapper.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetVersionFileMapper.java index 0f11182..14268da 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetVersionFileMapper.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetVersionFileMapper.java @@ -19,6 +19,7 @@ package org.dubhe.data.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; import org.apache.ibatis.annotations.*; +import org.dubhe.data.domain.bo.FileUploadBO; import org.dubhe.data.domain.dto.DatasetVersionFileDTO; import org.dubhe.data.domain.entity.DataFileAnnotation; import org.dubhe.data.domain.entity.Dataset; @@ -302,4 +303,11 @@ public interface DatasetVersionFileMapper extends BaseMapper * @return Long 版本文件id */ Long getVersionFileIdByFileName(@Param("datasetId")Long datasetId, @Param("fileName")String fileName, @Param("versionName")String versionName); + + /** + * 获取导入文件所需信息 + * @param datasetId 数据集id + * @return List + */ + List getFileUploadContent(@Param("datasetId")Long datasetId,@Param("fileIds")List fileIds); } \ No newline at end of file diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/FileMapper.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/FileMapper.java index a83b534..6535167 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/FileMapper.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/FileMapper.java @@ -164,7 +164,8 @@ public interface FileMapper extends BaseMapper { * @param fileIdsNotToEs 需要同步的文件ID * @return List ES数据同步DTO */ - List selectTextDataNoTransport(@Param("datasetId") Long datasetId,@Param("fileIdsNotToEs")List fileIdsNotToEs); + List selectTextDataNoTransport(@Param("datasetId") Long datasetId,@Param("fileIdsNotToEs")List fileIdsNotToEs, + @Param("ifImport") Boolean ifImport); /** * 更新同步es标志 diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/bo/FileUploadBO.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/bo/FileUploadBO.java new file mode 100644 index 0000000..2188625 --- /dev/null +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/bo/FileUploadBO.java @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.data.domain.bo; + +import lombok.*; + +import java.io.Serializable; + +@Builder +@Data +@ToString +@AllArgsConstructor +@NoArgsConstructor +public class FileUploadBO implements Serializable { + + String fileUrl; + + String fileName; + + Long fileId; + + Long versionFileId; + + String annPath; +} diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/dto/BatchFileCreateDTO.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/dto/BatchFileCreateDTO.java index 457d1ac..a71198b 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/dto/BatchFileCreateDTO.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/dto/BatchFileCreateDTO.java @@ -43,4 +43,5 @@ public class BatchFileCreateDTO implements Serializable { @NotNull(message = "文件不能为空") private List files; + Boolean ifImport; } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/vo/LabelGroupQueryVO.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/vo/LabelGroupQueryVO.java index 1c53356..ee48ffb 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/vo/LabelGroupQueryVO.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/vo/LabelGroupQueryVO.java @@ -17,6 +17,7 @@ package org.dubhe.data.domain.vo; +import io.swagger.annotations.ApiModelProperty; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -83,4 +84,7 @@ public class LabelGroupQueryVO extends PageQueryBase implements Serializable { * 标签组类型:0:视觉,1:文本 */ private Integer labelGroupType; + + @ApiModelProperty("创建人用户名") + private String createUserName; } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/FileController.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/FileController.java index 4bdedc2..4e684e5 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/FileController.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/FileController.java @@ -187,6 +187,13 @@ public class FileController { return new DataResponseBody(minioUtil.getEncryptedPutUrl(bucketName, objectName, expiry)); } + @ApiOperation("MinIO生成put请求的上传路径列表") + @PostMapping(value = "/minio/getUrls") + @PreAuthorize(Permissions.DATA) + public DataResponseBody getEncryptedPutUrls(@RequestBody String objectNames) { + return new DataResponseBody(minioUtil.getEncryptedPutUrls(bucketName, objectNames, expiry)); + } + @ApiOperation("获取MinIO相关信息") @GetMapping(value = "/minio/info") public DataResponseBody getMinIOInfo() { diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/LabelGroupController.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/LabelGroupController.java index 132c35a..a899aa7 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/LabelGroupController.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/LabelGroupController.java @@ -53,8 +53,7 @@ public class LabelGroupController { @PostMapping(value = "/labelGroup") @PreAuthorize(Permissions.DATA) public DataResponseBody create(@Validated @RequestBody LabelGroupCreateDTO labelGroupCreateDTO) { - labelGroupService.creatLabelGroup(labelGroupCreateDTO); - return new DataResponseBody(); + return new DataResponseBody(labelGroupService.creatLabelGroup(labelGroupCreateDTO)); } @ApiOperation(value = "标签组分页列表") @@ -103,8 +102,7 @@ public class LabelGroupController { public DataResponseBody importLabelGroup( @RequestParam(value = "file", required = false) MultipartFile file, LabelGroupImportDTO labelGroupImportDTO) { - labelGroupService.importLabelGroup(labelGroupImportDTO, file); - return new DataResponseBody(); + return new DataResponseBody(labelGroupService.importLabelGroup(labelGroupImportDTO, file)); } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionFileService.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionFileService.java index ae69a56..2460b06 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionFileService.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionFileService.java @@ -18,6 +18,7 @@ package org.dubhe.data.service; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import org.dubhe.data.domain.bo.FileUploadBO; import org.dubhe.data.domain.dto.DatasetVersionFileDTO; import org.dubhe.data.domain.entity.Dataset; import org.dubhe.data.domain.entity.DatasetVersion; @@ -342,4 +343,11 @@ public interface DatasetVersionFileService { * @param versionName 版本名称 */ Long getVersionFileIdByFileName(Long datasetId, String fileName, String versionName); + + /** + * 获取导入文件所需信息 + * @param datasetId 数据集id + * @return List + */ + List getFileUploadContent(Long datasetId,List fileIds); } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/FileService.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/FileService.java index 8c9f412..96df8fd 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/FileService.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/FileService.java @@ -315,7 +315,7 @@ public interface FileService { * @param dataset 数据集 * @param fileIdsNotToEs 需要同步的文件ID */ - void transportTextToEs(Dataset dataset,List fileIdsNotToEs); + void transportTextToEs(Dataset dataset,List fileIdsNotToEs,Boolean ifImport); /** * 还原es_transport状态 diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/LabelGroupService.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/LabelGroupService.java index ae89bc0..6c49111 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/LabelGroupService.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/LabelGroupService.java @@ -38,7 +38,7 @@ public interface LabelGroupService { * * @param labelGroupCreateDTO 创建标签组DTO */ - void creatLabelGroup(LabelGroupCreateDTO labelGroupCreateDTO); + Long creatLabelGroup(LabelGroupCreateDTO labelGroupCreateDTO); /** * 更新(编辑)标签组 @@ -94,7 +94,7 @@ public interface LabelGroupService { * @param labelGroupImportDTO 标签组导入DTO * @param file 导入文件 */ - void importLabelGroup(LabelGroupImportDTO labelGroupImportDTO, MultipartFile file); + Long importLabelGroup(LabelGroupImportDTO labelGroupImportDTO, MultipartFile file); /** * 标签组复制 diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java index ae0cd09..89b1e9c 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java @@ -22,12 +22,15 @@ import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import org.dubhe.biz.base.constant.*; +import org.dubhe.biz.base.dto.UserDTO; +import org.dubhe.biz.base.dto.UserSmallDTO; import org.dubhe.biz.base.vo.DatasetVO; import org.dubhe.biz.file.utils.MinioUtil; import org.dubhe.biz.file.api.FileStoreApi; @@ -49,6 +52,7 @@ import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.biz.permission.annotation.RolePermission; import org.dubhe.biz.statemachine.dto.StateChangeDTO; +import org.dubhe.cloud.authconfig.service.AdminClient; import org.dubhe.cloud.authconfig.utils.JwtUtils; import org.dubhe.biz.base.vo.DatasetVO; import org.dubhe.data.client.TrainServerClient; @@ -56,6 +60,7 @@ import org.dubhe.data.constant.*; import org.dubhe.data.dao.DatasetMapper; import org.dubhe.data.dao.TaskMapper; import org.dubhe.biz.base.vo.ProgressVO; +import org.dubhe.data.domain.bo.FileUploadBO; import org.dubhe.data.domain.dto.*; import org.dubhe.data.domain.entity.*; import org.dubhe.data.domain.vo.*; @@ -67,6 +72,7 @@ import org.dubhe.data.machine.utils.StateMachineUtil; import org.dubhe.data.pool.BasePool; import org.dubhe.data.service.*; import org.dubhe.data.service.task.DatasetRecycleFile; +import org.dubhe.data.util.GeneratorKeyUtil; import org.dubhe.data.util.ZipUtil; import org.dubhe.recycle.domain.dto.RecycleCreateDTO; import org.dubhe.recycle.domain.dto.RecycleDetailCreateDTO; @@ -113,6 +119,9 @@ public class DatasetServiceImpl extends ServiceImpl impl @Lazy private TaskService taskService; + @Resource + private AdminClient adminClient; + @Resource(name = "hostFileStoreApiImpl") private FileStoreApi fileStoreApi; @@ -260,6 +269,9 @@ public class DatasetServiceImpl extends ServiceImpl impl @Resource private MinioUtil minioUtil; + @Autowired + private GeneratorKeyUtil generatorKeyUtil; + /** * 线程池 */ @@ -551,6 +563,7 @@ public class DatasetServiceImpl extends ServiceImpl impl datasetVO.setLabelGroupType(labelGroupType); datasetVO.setSourceId(dataset.getSourceId()); datasetVO.setCurrentVersionName(dataset.getCurrentVersionName()); + return datasetVO; } @@ -603,25 +616,23 @@ public class DatasetServiceImpl extends ServiceImpl impl } catch (DuplicateKeyException e) { throw new BusinessException(ErrorEnum.DATASET_NAME_DUPLICATED_ERROR); } - if (!dataset.isImport()) { - //新增数据标签关系 - List