You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

parampack.py 5.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import collections
  10. from typing import Iterable, Optional
  11. import numpy as np
  12. from .._internal.opr import param_pack_split
  13. from ..core import Parameter, Tensor
  14. from .module import Module
  15. class ParamPack(Module):
  16. r"""Pack module's parameters by gathering their memory to continuous address.
  17. Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True),
  18. parameters with same key will be packed togather.
  19. It helps a lot for multimachine training by speeding up allreduce gradients.
  20. :param model: the module you want to pack parameters.
  21. :param nr_ignore_first: how many parameters will be unpacked at first.
  22. :param max_size_per_group: upper bound of packed parameters' size in MB.
  23. :param max_nr_params_per_group: upper bound of the number of parameters of each group.
  24. """
  25. def __init__(
  26. self,
  27. model: Module,
  28. nr_ignore_first: int = 8,
  29. max_size_per_group: int = 10,
  30. max_nr_params_per_group: int = 100,
  31. ):
  32. super().__init__()
  33. self._model = model
  34. self._nr_ignore_first = nr_ignore_first
  35. self._max_size_per_group = max_size_per_group
  36. self._max_nr_params_per_group = max_nr_params_per_group
  37. self._grouped_params = []
  38. self._packed_params = []
  39. params = model.parameters()
  40. self._pack_params(params)
  41. def parameters(self, requires_grad: Optional[bool] = None) -> Iterable[Parameter]:
  42. for param in self._packed_params:
  43. if requires_grad is None or param.requires_grad == requires_grad:
  44. yield param
  45. def _pack_params(self, params: Iterable[Parameter]):
  46. groups = collections.defaultdict(list)
  47. ignored = 0
  48. param_id = 0
  49. for param in params:
  50. if self._nr_ignore_first > ignored:
  51. ignored += 1
  52. self._grouped_params.append([{"shape": param.shape, "id": param_id}])
  53. self._packed_params.append(param)
  54. else:
  55. key = (param.dtype, param.device, param.requires_grad)
  56. groups[key].append({"tensor": param, "id": param_id})
  57. param_id += 1
  58. for (dtype, device, requires_grad) in groups.keys():
  59. dtype_sz = np.dtype(dtype).itemsize
  60. align = device.mem_align
  61. if align < dtype_sz:
  62. align = 1
  63. else:
  64. assert align % dtype_sz == 0
  65. align //= dtype_sz
  66. group = groups[(dtype, device, requires_grad)]
  67. while group:
  68. aligned_pos = []
  69. offset = 0
  70. params = []
  71. idx = 0
  72. while idx < len(group):
  73. param = group[idx]
  74. assert param["tensor"].device == device
  75. padding = (align - (offset & (align - 1))) & (align - 1)
  76. offset += padding
  77. aligned_pos.append(offset)
  78. params.append(param)
  79. offset += int(np.prod(param["tensor"].shape))
  80. idx += 1
  81. if (
  82. offset * dtype_sz >= self._max_size_per_group * 1024 * 1024
  83. or idx >= self._max_nr_params_per_group
  84. ):
  85. break
  86. group = group[idx:]
  87. if idx == 1:
  88. # ignore param packs with only one item
  89. self._packed_params.append(params[0]["tensor"])
  90. self._grouped_params.append(
  91. [{"shape": params[0]["tensor"].shape, "id": params[0]["id"]}]
  92. )
  93. continue
  94. packed_value = np.zeros((offset,), dtype=dtype)
  95. for param, pos in zip(params, aligned_pos):
  96. val = param["tensor"].numpy()
  97. packed_value[pos : pos + val.size] = val.flatten()
  98. new_param = Parameter(
  99. value=packed_value,
  100. device=device,
  101. dtype=dtype,
  102. requires_grad=requires_grad,
  103. )
  104. self._packed_params.append(new_param)
  105. self._grouped_params.append(
  106. [{"shape": i["tensor"].shape, "id": i["id"]} for i in params]
  107. )
  108. def forward(self, *args, **kwargs):
  109. replace_param = dict()
  110. for i in range(len(self._packed_params)):
  111. packed_param = self._packed_params[i]
  112. grouped_params = self._grouped_params[i]
  113. if len(grouped_params) == 1:
  114. continue
  115. split = param_pack_split(
  116. packed_param._symvar, [i["shape"] for i in grouped_params]
  117. )
  118. split = [
  119. Parameter(Tensor(i, requires_grad=packed_param.requires_grad))
  120. for i in split
  121. ]
  122. for j in range(len(split)):
  123. replace_param[grouped_params[j]["id"]] = split[j]
  124. self._model.replace_param(replace_param, 0)
  125. return self._model.forward(*args, **kwargs)

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台