You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

_queue.py 5.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import binascii
  10. import os
  11. import queue
  12. import subprocess
  13. from multiprocessing import Queue
  14. import pyarrow
  15. import pyarrow.plasma as plasma
  16. MGE_PLASMA_MEMORY = int(os.environ.get("MGE_PLASMA_MEMORY", 4000000000)) # 4GB
  17. # Each process only need to start one plasma store, so we set it as a global variable.
  18. # TODO: how to share between different processes?
  19. MGE_PLASMA_STORE_MANAGER = None
  20. def _clear_plasma_store():
  21. # `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess,
  22. # so this function should be called explicitly
  23. global MGE_PLASMA_STORE_MANAGER
  24. if MGE_PLASMA_STORE_MANAGER is not None and MGE_PLASMA_STORE_MANAGER.refcount == 0:
  25. del MGE_PLASMA_STORE_MANAGER
  26. MGE_PLASMA_STORE_MANAGER = None
  27. class _PlasmaStoreManager:
  28. __initialized = False
  29. def __init__(self):
  30. self.socket_name = "/tmp/mge_plasma_{}".format(
  31. binascii.hexlify(os.urandom(8)).decode()
  32. )
  33. debug_flag = bool(os.environ.get("MGE_DATALOADER_PLASMA_DEBUG", 0))
  34. # NOTE: this is a hack. Directly use `plasma_store` may make subprocess
  35. # difficult to handle the exception happened in `plasma-store-server`.
  36. # For `plasma_store` is just a wrapper of `plasma-store-server`, which use
  37. # `os.execv` to call the executable `plasma-store-server`.
  38. cmd_path = os.path.join(pyarrow.__path__[0], "plasma-store-server")
  39. self.plasma_store = subprocess.Popen(
  40. [cmd_path, "-s", self.socket_name, "-m", str(MGE_PLASMA_MEMORY),],
  41. stdout=None if debug_flag else subprocess.DEVNULL,
  42. stderr=None if debug_flag else subprocess.DEVNULL,
  43. )
  44. self.__initialized = True
  45. self.refcount = 1
  46. def __del__(self):
  47. if self.__initialized and self.plasma_store.returncode is None:
  48. self.plasma_store.kill()
  49. class PlasmaShmQueue:
  50. def __init__(self, maxsize: int = 0):
  51. r"""Use pyarrow in-memory plasma store to implement shared memory queue.
  52. Compared to native `multiprocess.Queue`, `PlasmaShmQueue` avoid pickle/unpickle
  53. and communication overhead, leading to better performance in multi-process
  54. application.
  55. :type maxsize: int
  56. :param maxsize: maximum size of the queue, `None` means no limit. (default: ``None``)
  57. """
  58. # Lazy start the plasma store manager
  59. global MGE_PLASMA_STORE_MANAGER
  60. if MGE_PLASMA_STORE_MANAGER is None:
  61. try:
  62. MGE_PLASMA_STORE_MANAGER = _PlasmaStoreManager()
  63. except Exception as e:
  64. err_info = (
  65. "Please make sure pyarrow installed correctly!\n"
  66. "You can try reinstall pyarrow and see if you can run "
  67. "`plasma_store -s /tmp/mge_plasma_xxx -m 1000` normally."
  68. )
  69. raise RuntimeError(
  70. "Exception happened in starting plasma_store: {}\n"
  71. "Tips: {}".format(str(e), err_info)
  72. )
  73. else:
  74. MGE_PLASMA_STORE_MANAGER.refcount += 1
  75. self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name
  76. # TODO: how to catch the exception happened in `plasma.connect`?
  77. self.client = None
  78. # Used to store the header for the data.(ObjectIDs)
  79. self.queue = Queue(maxsize) # type: Queue
  80. def put(self, data, block=True, timeout=None):
  81. if self.client is None:
  82. self.client = plasma.connect(self.socket_name)
  83. try:
  84. object_id = self.client.put(data)
  85. except plasma.PlasmaStoreFull:
  86. raise RuntimeError("plasma store out of memory!")
  87. try:
  88. self.queue.put(object_id, block, timeout)
  89. except queue.Full:
  90. self.client.delete([object_id])
  91. raise queue.Full
  92. def get(self, block=True, timeout=None):
  93. if self.client is None:
  94. self.client = plasma.connect(self.socket_name)
  95. object_id = self.queue.get(block, timeout)
  96. if not self.client.contains(object_id):
  97. raise RuntimeError(
  98. "ObjectID: {} not found in plasma store".format(object_id)
  99. )
  100. data = self.client.get(object_id)
  101. self.client.delete([object_id])
  102. return data
  103. def qsize(self):
  104. return self.queue.qsize()
  105. def empty(self):
  106. return self.queue.empty()
  107. def join(self):
  108. self.queue.join()
  109. def disconnect_client(self):
  110. if self.client is not None:
  111. self.client.disconnect()
  112. def close(self):
  113. self.queue.close()
  114. self.disconnect_client()
  115. global MGE_PLASMA_STORE_MANAGER
  116. MGE_PLASMA_STORE_MANAGER.refcount -= 1
  117. _clear_plasma_store()
  118. def cancel_join_thread(self):
  119. self.queue.cancel_join_thread()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台