You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parampack.py 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import collections
  10. from typing import Callable, Iterable, Optional, Tuple
  11. import numpy as np
  12. from .._internal.opr import param_pack_split
  13. from ..core import Parameter, Tensor
  14. from .module import Module
  15. class ParamPack(Module):
  16. r"""Pack module's parameters by gathering their memory to continuous address.
  17. Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True),
  18. parameters with same key will be packed togather.
  19. It helps a lot for multimachine training by speeding up allreduce gradients.
  20. :param model: the module you want to pack parameters.
  21. :param nr_ignore_first: how many parameters will be unpacked at first.
  22. :param max_size_per_group: upper bound of packed parameters' size in MB.
  23. :param max_nr_params_per_group: upper bound of the number of parameters of each group.
  24. """
  25. def __init__(
  26. self,
  27. model: Module,
  28. nr_ignore_first: int = 8,
  29. max_size_per_group: int = 10,
  30. max_nr_params_per_group: int = 100,
  31. group_func: Callable = lambda name, param: 0,
  32. ):
  33. super().__init__()
  34. self._model = model
  35. self._nr_ignore_first = nr_ignore_first
  36. self._max_size_per_group = max_size_per_group
  37. self._max_nr_params_per_group = max_nr_params_per_group
  38. self._group_func = group_func
  39. self._grouped_params = []
  40. self._packed_params = []
  41. params = model.named_parameters()
  42. self._pack_params(params)
  43. def parameters(self, requires_grad: Optional[bool] = None) -> Iterable[Parameter]:
  44. for param in self._packed_params:
  45. if requires_grad is None or param.requires_grad == requires_grad:
  46. yield param
  47. def named_parameters(
  48. self, requires_grad: Optional[bool] = None
  49. ) -> Iterable[Tuple[str, Parameter]]:
  50. for idx, param in enumerate(self._packed_params):
  51. if requires_grad is None or param.requires_grad == requires_grad:
  52. yield "packed_param_" + str(idx), param
  53. def _pack_params(self, params: Iterable[Tuple[str, Parameter]]):
  54. groups = collections.defaultdict(list)
  55. ignored = 0
  56. param_id = 0
  57. for name, param in params:
  58. if self._nr_ignore_first > ignored:
  59. ignored += 1
  60. self._grouped_params.append([{"shape": param.shape, "id": param_id}])
  61. param.pack_group_key = self._group_func(name, param)
  62. self._packed_params.append(param)
  63. else:
  64. key = (
  65. param.dtype,
  66. param.device,
  67. param.requires_grad,
  68. self._group_func(name, param),
  69. )
  70. groups[key].append({"tensor": param, "id": param_id})
  71. param_id += 1
  72. for (dtype, device, requires_grad, group_key) in groups.keys():
  73. dtype_sz = np.dtype(dtype).itemsize
  74. align = device.mem_align
  75. if align < dtype_sz:
  76. align = 1
  77. else:
  78. assert align % dtype_sz == 0
  79. align //= dtype_sz
  80. group = groups[(dtype, device, requires_grad, group_key)]
  81. while group:
  82. aligned_pos = []
  83. offset = 0
  84. params = []
  85. idx = 0
  86. while idx < len(group):
  87. param = group[idx]
  88. assert param["tensor"].device == device
  89. padding = (align - (offset & (align - 1))) & (align - 1)
  90. offset += padding
  91. aligned_pos.append(offset)
  92. params.append(param)
  93. offset += int(np.prod(param["tensor"].shape))
  94. idx += 1
  95. if (
  96. offset * dtype_sz >= self._max_size_per_group * 1024 * 1024
  97. or idx >= self._max_nr_params_per_group
  98. ):
  99. break
  100. group = group[idx:]
  101. if idx == 1:
  102. # ignore param packs with only one item
  103. params[0]["tensor"].pack_group_key = group_key
  104. self._packed_params.append(params[0]["tensor"])
  105. self._grouped_params.append(
  106. [{"shape": params[0]["tensor"].shape, "id": params[0]["id"]}]
  107. )
  108. continue
  109. packed_value = np.zeros((offset,), dtype=dtype)
  110. for param, pos in zip(params, aligned_pos):
  111. val = param["tensor"].numpy()
  112. packed_value[pos : pos + val.size] = val.flatten()
  113. new_param = Parameter(
  114. value=packed_value,
  115. device=device,
  116. dtype=dtype,
  117. requires_grad=requires_grad,
  118. )
  119. new_param.pack_group_key = group_key
  120. self._packed_params.append(new_param)
  121. self._grouped_params.append(
  122. [{"shape": i["tensor"].shape, "id": i["id"]} for i in params]
  123. )
  124. def forward(self, *args, **kwargs):
  125. replace_param = dict()
  126. for i in range(len(self._packed_params)):
  127. packed_param = self._packed_params[i]
  128. grouped_params = self._grouped_params[i]
  129. if len(grouped_params) == 1:
  130. continue
  131. split = param_pack_split(
  132. packed_param._symvar, [i["shape"] for i in grouped_params]
  133. )
  134. split = [
  135. Parameter(Tensor(i, requires_grad=packed_param.requires_grad))
  136. for i in split
  137. ]
  138. for j in range(len(split)):
  139. replace_param[grouped_params[j]["id"]] = split[j]
  140. self._model.replace_param(replace_param, 0)
  141. return self._model.forward(*args, **kwargs)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台