You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

helper.py 2.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. from typing import Optional, Union
  10. import megengine._internal as mgb
  11. from megengine._internal.opr_param_defs import CollectiveComm as CollParam
  12. from .util import (
  13. get_backend,
  14. get_group_id,
  15. get_master_ip,
  16. get_master_port,
  17. get_rank,
  18. get_world_size,
  19. )
  20. def collective_comm_symvar(
  21. inp: Union[mgb.SymbolVar, mgb.CompGraph],
  22. key: Optional[str] = None,
  23. op: CollParam.Mode = None,
  24. nr_ranks: Optional[int] = None,
  25. is_root: Optional[bool] = None,
  26. rank: Optional[int] = None,
  27. local_grad: Optional[bool] = False,
  28. dtype: Optional[type] = None,
  29. device: Optional[mgb.CompNode] = None,
  30. comp_graph: Optional[mgb.CompGraph] = None,
  31. ) -> mgb.SymbolVar:
  32. """Helper function for creating collective_comm operators
  33. :param inp: tensor or comp_graph
  34. :param key: unique identifier for collective communication
  35. :param op: mode of collective communication
  36. :param nr_ranks: number of ranks, use util.get_world_size() as default
  37. :param is_root: whether this node is root node
  38. :param rank: rank of this node
  39. :param local_grad: whether use local grad
  40. :param dtype: output data type, use dtype of inp as default
  41. :param device: output comp node, use comp node of inp as default
  42. :param comp_graph: output comp graph, use comp graph of inp as default
  43. """
  44. return mgb.opr.collective_comm(
  45. inp,
  46. key=key if key is not None else ("collective_comm_" + str(get_group_id())),
  47. nr_devices=nr_ranks if nr_ranks is not None else get_world_size(),
  48. is_root=is_root if is_root is not None else (get_rank() == 0),
  49. rank=rank if rank is not None else get_rank(),
  50. local_grad=local_grad,
  51. server_addr=get_master_ip(),
  52. port=get_master_port(),
  53. param=CollParam(mode=op),
  54. dtype=dtype,
  55. backend=get_backend(),
  56. comp_node=device,
  57. comp_graph=comp_graph,
  58. )

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台