You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. # -*- coding: utf-8 -*-
  2. # This file is part of MegEngine, a deep learning framework developed by
  3. # Megvii.
  4. #
  5. # Copyright (c) Copyright (c) 2020-2021 Megvii Inc. All rights reserved.
  6. from ctypes import *
  7. import numpy as np
  8. from .base import _Ctensor, _lib, _LiteCObjBase
  9. from .struct import LiteDataType, LiteDeviceType, LiteIOType, Structure
  10. MAX_DIM = 7
  11. _lite_type_to_nptypes = {
  12. LiteDataType.LITE_INT: np.int32,
  13. LiteDataType.LITE_FLOAT: np.float32,
  14. LiteDataType.LITE_UINT8: np.uint8,
  15. LiteDataType.LITE_INT8: np.int8,
  16. LiteDataType.LITE_INT16: np.int16,
  17. LiteDataType.LITE_HALF: np.float16,
  18. }
  19. _nptype_to_lite_type = {val: key for key, val in _lite_type_to_nptypes.items()}
  20. _str_nptypes_to_lite_nptypes = {
  21. np.dtype("int32"): LiteDataType.LITE_INT,
  22. np.dtype("float32"): LiteDataType.LITE_FLOAT,
  23. np.dtype("uint8"): LiteDataType.LITE_UINT8,
  24. np.dtype("int8"): LiteDataType.LITE_INT8,
  25. np.dtype("int16"): LiteDataType.LITE_INT16,
  26. np.dtype("float16"): LiteDataType.LITE_HALF,
  27. }
  28. ctype_to_lite_dtypes = {
  29. c_int: LiteDataType.LITE_INT,
  30. c_uint: LiteDataType.LITE_INT,
  31. c_float: LiteDataType.LITE_FLOAT,
  32. c_ubyte: LiteDataType.LITE_UINT8,
  33. c_byte: LiteDataType.LITE_INT8,
  34. c_short: LiteDataType.LITE_INT16,
  35. c_ushort: LiteDataType.LITE_INT16,
  36. }
  37. class LiteLayout(Structure):
  38. """
  39. the simple layout description
  40. """
  41. _fields_ = [
  42. ("shapes", c_size_t * MAX_DIM),
  43. ("ndim", c_size_t),
  44. ("data_type", c_int),
  45. ]
  46. def __init__(self, shape=None, dtype=None):
  47. if shape:
  48. shape = list(shape)
  49. assert len(shape) <= MAX_DIM, "Layout max dim is 7."
  50. self.shapes = (c_size_t * MAX_DIM)(*shape)
  51. self.ndim = len(shape)
  52. else:
  53. self.shapes = (c_size_t * MAX_DIM)()
  54. self.ndim = 0
  55. if not dtype:
  56. self.data_type = LiteDataType.LITE_FLOAT
  57. elif isinstance(dtype, LiteDataType):
  58. self.data_type = dtype
  59. elif type(dtype) == str:
  60. self.data_type = _str_nptypes_to_lite_nptypes[np.dtype(dtype)]
  61. elif isinstance(dtype, np.dtype):
  62. ctype = np.ctypeslib.as_ctypes_type(dtype)
  63. self.data_type = ctype_to_lite_dtypes[ctype]
  64. elif isinstance(dtype, type):
  65. self.data_type = _nptype_to_lite_type[dtype]
  66. else:
  67. raise RuntimeError("unkonw data type")
  68. def __repr__(self):
  69. data = {
  70. "shapes": list(self.shapes),
  71. "ndim": self.ndim,
  72. "data_type": _lite_type_to_nptypes[LiteDataType(self.data_type)],
  73. }
  74. return data.__repr__()
  75. class _LiteTensorDesc(Structure):
  76. """
  77. warpper of the MegEngine Tensor
  78. :is_pinned_host: when set, the storage memory of the tensor is pinned memory,
  79. this is used to Optimize the H2D or D2H memory copy, if the device or layout
  80. is not set, when copy form other device(CUDA) tensor, this tensor
  81. will be automatically set to pinned tensor
  82. """
  83. _fields_ = [
  84. ("is_pinned_host", c_int),
  85. ("layout", LiteLayout),
  86. ("device_type", c_int),
  87. ("device_id", c_int),
  88. ]
  89. def __init__(self):
  90. self.layout = LiteLayout()
  91. self.device_type = LiteDeviceType.LITE_CPU
  92. self.is_pinned_host = False
  93. self.device_id = 0
  94. def __repr__(self):
  95. data = {
  96. "is_pinned_host": self.is_pinned_host,
  97. "layout": LiteLayout(self.layout),
  98. "device_type": LiteDeviceType(self.device_type.value),
  99. "device_id": self.device_id,
  100. }
  101. return data.__repr__()
  102. class _TensorAPI(_LiteCObjBase):
  103. """
  104. get the api from the lib
  105. """
  106. _api_ = [
  107. ("LITE_make_tensor", [_LiteTensorDesc, POINTER(_Ctensor)]),
  108. ("LITE_set_tensor_layout", [_Ctensor, LiteLayout]),
  109. ("LITE_reset_tensor_memory", [_Ctensor, c_void_p, c_size_t]),
  110. ("LITE_reset_tensor", [_Ctensor, LiteLayout, c_void_p]),
  111. ("LITE_tensor_reshape", [_Ctensor, POINTER(c_int), c_int]),
  112. (
  113. "LITE_tensor_slice",
  114. [
  115. _Ctensor,
  116. POINTER(c_size_t),
  117. POINTER(c_size_t),
  118. POINTER(c_size_t),
  119. c_size_t,
  120. POINTER(_Ctensor),
  121. ],
  122. ),
  123. (
  124. "LITE_tensor_concat",
  125. [POINTER(_Ctensor), c_int, c_int, c_int, c_int, POINTER(_Ctensor),],
  126. ),
  127. ("LITE_tensor_fill_zero", [_Ctensor]),
  128. ("LITE_tensor_copy", [_Ctensor, _Ctensor]),
  129. ("LITE_tensor_share_memory_with", [_Ctensor, _Ctensor]),
  130. ("LITE_get_tensor_memory", [_Ctensor, POINTER(c_void_p)]),
  131. ("LITE_get_tensor_total_size_in_byte", [_Ctensor, POINTER(c_size_t)]),
  132. ("LITE_get_tensor_layout", [_Ctensor, POINTER(LiteLayout)]),
  133. ("LITE_get_tensor_device_type", [_Ctensor, POINTER(c_int)]),
  134. ("LITE_get_tensor_device_id", [_Ctensor, POINTER(c_int)]),
  135. ("LITE_destroy_tensor", [_Ctensor]),
  136. ("LITE_is_pinned_host", [_Ctensor, POINTER(c_int)]),
  137. ]
  138. class LiteTensor(object):
  139. """
  140. the tensor to hold a block of data
  141. """
  142. _api = _TensorAPI()._lib
  143. def __init__(
  144. self,
  145. layout=None,
  146. device_type=LiteDeviceType.LITE_CPU,
  147. device_id=0,
  148. is_pinned_host=False,
  149. ):
  150. """
  151. create a Tensor with layout, device, is_pinned_host param
  152. """
  153. self._tensor = _Ctensor()
  154. if layout:
  155. self._layout = layout
  156. else:
  157. self._layout = LiteLayout()
  158. self._device_type = device_type
  159. self._device_id = device_id
  160. self._is_pinned_host = is_pinned_host
  161. tensor_desc = _LiteTensorDesc()
  162. tensor_desc.layout = self._layout
  163. tensor_desc.device_type = device_type
  164. tensor_desc.device_id = device_id
  165. tensor_desc.is_pinned_host = is_pinned_host
  166. self._api.LITE_make_tensor(tensor_desc, byref(self._tensor))
  167. def __del__(self):
  168. self._api.LITE_destroy_tensor(self._tensor)
  169. def fill_zero(self):
  170. """
  171. fill the buffer memory with zero
  172. """
  173. self._api.LITE_tensor_fill_zero(self._tensor)
  174. self.update()
  175. def share_memory_with(self, src_tensor):
  176. """
  177. share the same memory with the src_tensor, the self memory will be freed
  178. """
  179. assert isinstance(src_tensor, LiteTensor)
  180. self._api.LITE_tensor_share_memory_with(self._tensor, src_tensor._tensor)
  181. self.update()
  182. @property
  183. def layout(self):
  184. self._api.LITE_get_tensor_layout(self._tensor, byref(self._layout))
  185. return self._layout
  186. @layout.setter
  187. def layout(self, layout):
  188. assert isinstance(layout, LiteLayout)
  189. self._layout = layout
  190. self._api.LITE_set_tensor_layout(self._tensor, layout)
  191. @property
  192. def is_pinned_host(self):
  193. """
  194. whether the tensor is pinned tensor
  195. """
  196. pinned = c_int()
  197. self._api.LITE_is_pinned_host(self._tensor, byref(pinned))
  198. self._is_pinned_host = pinned
  199. return bool(self._is_pinned_host)
  200. @property
  201. def device_type(self):
  202. """
  203. get device of the tensor
  204. """
  205. device_type = c_int()
  206. self._api.LITE_get_tensor_device_type(self._tensor, byref(device_type))
  207. self._device_type = device_type
  208. return LiteDeviceType(device_type.value)
  209. @property
  210. def device_id(self):
  211. """
  212. get device id of the tensor
  213. """
  214. device_id = c_int()
  215. self._api.LITE_get_tensor_device_id(self._tensor, byref(device_id))
  216. self._device_id = device_id.value
  217. return device_id.value
  218. @property
  219. def is_continue(self):
  220. """
  221. whether the tensor memory is continue
  222. """
  223. is_continue = c_int()
  224. self._api.LITE_is_memory_continue(self._tensor, byref(is_continue))
  225. return bool(is_continue.value)
  226. @property
  227. def nbytes(self):
  228. """
  229. get the length of the meomry in byte
  230. """
  231. self.update()
  232. length = c_size_t()
  233. self._api.LITE_get_tensor_total_size_in_byte(self._tensor, byref(length))
  234. return length.value
  235. def update(self):
  236. """
  237. update the member from C, this will auto used after slice, share
  238. """
  239. pinned = c_int()
  240. self._api.LITE_is_pinned_host(self._tensor, byref(pinned))
  241. self._is_pinned_host = pinned
  242. device_type = c_int()
  243. self._api.LITE_get_tensor_device_type(self._tensor, byref(device_type))
  244. self._device_type = device_type
  245. self._api.LITE_get_tensor_layout(self._tensor, byref(self._layout))
  246. def copy_from(self, src_tensor):
  247. """
  248. copy memory form the src_tensor
  249. """
  250. assert isinstance(src_tensor, LiteTensor)
  251. self._api.LITE_tensor_copy(self._tensor, src_tensor._tensor)
  252. self.update()
  253. def reshape(self, shape):
  254. """
  255. reshape the tensor with data not change, only change the shape
  256. :param shape: int arrary of dst_shape
  257. """
  258. shape = list(shape)
  259. length = len(shape)
  260. c_shape = (c_int * length)(*shape)
  261. self._api.LITE_tensor_reshape(self._tensor, c_shape, length)
  262. self.update()
  263. def slice(self, start, end, step=None):
  264. """
  265. slice the tensor with gaven start, end, step
  266. :param start: silce begin index of each dim
  267. :param end: silce end index of each dim
  268. :param step: silce step of each dim
  269. """
  270. start = list(start)
  271. end = list(end)
  272. length = len(start)
  273. assert length == len(end), "slice with different length of start and end."
  274. if step:
  275. assert length == len(step), "slice with different length of start and step."
  276. step = list(step)
  277. else:
  278. step = [1 for i in range(length)]
  279. c_start = (c_size_t * length)(*start)
  280. c_end = (c_size_t * length)(*end)
  281. c_step = (c_size_t * length)(*step)
  282. slice_tensor = LiteTensor()
  283. self._api.LITE_tensor_slice(
  284. self._tensor, c_start, c_end, c_step, length, byref(slice_tensor._tensor)
  285. )
  286. slice_tensor.update()
  287. return slice_tensor
  288. def get_ctypes_memory(self):
  289. """
  290. get the memory of the tensor, return c_void_p of the tensor memory
  291. """
  292. self.update()
  293. mem = c_void_p()
  294. self._api.LITE_get_tensor_memory(self._tensor, byref(mem))
  295. return mem
  296. def set_data_by_share(self, data, length=0, layout=None):
  297. """
  298. share the data to the tensor
  299. param data: the data will shared to the tensor, it should be a
  300. numpy.ndarray or ctypes data
  301. """
  302. self.update()
  303. if isinstance(data, np.ndarray):
  304. assert (
  305. self.is_continue
  306. ), "set_data_by_share can only apply in continue tensor."
  307. assert (
  308. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  309. ), "set_data_by_share can only apply in cpu tensor or pinned tensor."
  310. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  311. c_type = np.ctypeslib.as_ctypes_type(np_type)
  312. if self.nbytes != data.nbytes:
  313. self.layout = LiteLayout(data.shape, ctype_to_lite_dtypes[c_type])
  314. self._shared_data = data
  315. data = data.ctypes.data_as(POINTER(c_type))
  316. if layout is not None:
  317. self.layout = layout
  318. else:
  319. assert length == 0 or length == self.nbytes, "the data length is not match."
  320. self._api.LITE_reset_tensor_memory(self._tensor, data, self.nbytes)
  321. def set_data_by_copy(self, data, data_length=0, layout=None):
  322. """
  323. copy the data to the tensor
  324. param data: the data to copy to tensor, it should be list,
  325. numpy.ndarraya or ctypes with length
  326. """
  327. self.update()
  328. if layout is not None:
  329. self.layout = layout
  330. assert self.is_continue, "set_data_by_copy can only apply in continue tensor."
  331. assert (
  332. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  333. ), "set_data_by_copy can only apply in cpu tensor or pinned tensor."
  334. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  335. c_type = np.ctypeslib.as_ctypes_type(np_type)
  336. tensor_memory = c_void_p()
  337. if type(data) == list:
  338. length = len(data)
  339. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  340. tensor_length = self.nbytes
  341. assert (
  342. length * sizeof(c_type) <= tensor_length
  343. ), "the length of input data to set to the tensor is too large."
  344. arr = (c_type * length)(*data)
  345. memmove(tensor_memory, arr, sizeof(c_type) * length)
  346. elif type(data) == np.ndarray:
  347. if self.nbytes != data.nbytes:
  348. self.layout = LiteLayout(data.shape, data.dtype)
  349. arr = data.ctypes.data_as(POINTER(c_type))
  350. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  351. assert self.nbytes == data.nbytes
  352. memmove(tensor_memory, arr, self.nbytes)
  353. else:
  354. assert (
  355. data_length == self.nbytes or layout is not None
  356. ), "when input data is ctypes, the length of input data or layout must set"
  357. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  358. memmove(tensor_memory, data, data_length)
  359. def to_numpy(self):
  360. """
  361. get the buffer of the tensor
  362. """
  363. self.update()
  364. if self.nbytes <= 0:
  365. return np.array([])
  366. if self.is_continue and (
  367. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  368. ):
  369. ptr = c_void_p()
  370. self._api.LITE_get_tensor_memory(self._tensor, byref(ptr))
  371. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  372. shape = [self._layout.shapes[i] for i in range(self._layout.ndim)]
  373. np_arr = np.zeros(shape, np_type)
  374. if np_arr.nbytes:
  375. memmove(np_arr.ctypes.data_as(c_void_p), ptr, np_arr.nbytes)
  376. return np_arr
  377. else:
  378. tmp_tensor = LiteTensor(self.layout)
  379. tmp_tensor.copy_from(self)
  380. return tmp_tensor.to_numpy()
  381. def __repr__(self):
  382. self.update()
  383. data = {
  384. "layout": self._layout,
  385. "device_type": LiteDeviceType(self._device_type.value),
  386. "device_id": int(self.device_id),
  387. "is_pinned_host": bool(self._is_pinned_host),
  388. }
  389. return data.__repr__()
  390. def LiteTensorConcat(
  391. tensors, dim, device_type=LiteDeviceType.LITE_DEVICE_DEFAULT, device_id=-1
  392. ):
  393. """
  394. concat tensor in input dim to one tensor
  395. dim : the dim to act concat
  396. device_type: the result tensor device type
  397. device_id: the result tensor device id
  398. """
  399. api = _TensorAPI()._lib
  400. length = len(tensors)
  401. c_tensors = [t._tensor for t in tensors]
  402. c_tensors = (_Ctensor * length)(*c_tensors)
  403. result_tensor = LiteTensor()
  404. api.LITE_tensor_concat(
  405. cast(byref(c_tensors), POINTER(c_void_p)),
  406. length,
  407. dim,
  408. device_type,
  409. device_id,
  410. byref(result_tensor._tensor),
  411. )
  412. result_tensor.update()
  413. return result_tensor

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台