You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. from ctypes import *
  10. import numpy as np
  11. from .base import _Ctensor, _lib, _LiteCObjBase
  12. from .struct import LiteDataType, LiteDeviceType, LiteIOType, Structure
  13. MAX_DIM = 7
  14. _lite_type_to_nptypes = {
  15. LiteDataType.LITE_INT: np.int32,
  16. LiteDataType.LITE_FLOAT: np.float32,
  17. LiteDataType.LITE_UINT8: np.uint8,
  18. LiteDataType.LITE_INT8: np.int8,
  19. LiteDataType.LITE_INT16: np.int16,
  20. LiteDataType.LITE_HALF: np.float16,
  21. }
  22. _nptype_to_lite_type = {val: key for key, val in _lite_type_to_nptypes.items()}
  23. _str_nptypes_to_lite_nptypes = {
  24. np.dtype("int32"): LiteDataType.LITE_INT,
  25. np.dtype("float32"): LiteDataType.LITE_FLOAT,
  26. np.dtype("uint8"): LiteDataType.LITE_UINT8,
  27. np.dtype("int8"): LiteDataType.LITE_INT8,
  28. np.dtype("int16"): LiteDataType.LITE_INT16,
  29. np.dtype("float16"): LiteDataType.LITE_HALF,
  30. }
  31. ctype_to_lite_dtypes = {
  32. c_int: LiteDataType.LITE_INT,
  33. c_uint: LiteDataType.LITE_INT,
  34. c_float: LiteDataType.LITE_FLOAT,
  35. c_ubyte: LiteDataType.LITE_UINT8,
  36. c_byte: LiteDataType.LITE_INT8,
  37. c_short: LiteDataType.LITE_INT16,
  38. c_ushort: LiteDataType.LITE_INT16,
  39. }
  40. class LiteLayout(Structure):
  41. """
  42. the simple layout description
  43. """
  44. _fields_ = [
  45. ("shapes", c_size_t * MAX_DIM),
  46. ("ndim", c_size_t),
  47. ("data_type", c_int),
  48. ]
  49. def __init__(self, shape=None, dtype=None):
  50. if shape:
  51. shape = list(shape)
  52. assert len(shape) <= MAX_DIM, "Layout max dim is 7."
  53. self.shapes = (c_size_t * MAX_DIM)(*shape)
  54. self.ndim = len(shape)
  55. else:
  56. self.shapes = (c_size_t * MAX_DIM)()
  57. self.ndim = 0
  58. if not dtype:
  59. self.data_type = LiteDataType.LITE_FLOAT
  60. elif isinstance(dtype, LiteDataType):
  61. self.data_type = dtype
  62. elif type(dtype) == str:
  63. self.data_type = _str_nptypes_to_lite_nptypes[np.dtype(dtype)]
  64. elif isinstance(dtype, np.dtype):
  65. ctype = np.ctypeslib.as_ctypes_type(dtype)
  66. self.data_type = ctype_to_lite_dtypes[ctype]
  67. elif isinstance(dtype, type):
  68. self.data_type = _nptype_to_lite_type[dtype]
  69. else:
  70. raise RuntimeError("unkonw data type")
  71. def __repr__(self):
  72. data = {
  73. "shapes": list(self.shapes),
  74. "ndim": self.ndim,
  75. "data_type": _lite_type_to_nptypes[LiteDataType(self.data_type)],
  76. }
  77. return data.__repr__()
  78. class _LiteTensorDesc(Structure):
  79. """
  80. warpper of the MegEngine Tensor
  81. :is_pinned_host: when set, the storage memory of the tensor is pinned memory,
  82. this is used to Optimize the H2D or D2H memory copy, if the device or layout
  83. is not set, when copy form other device(CUDA) tensor, this tensor
  84. will be automatically set to pinned tensor
  85. """
  86. _fields_ = [
  87. ("is_pinned_host", c_int),
  88. ("layout", LiteLayout),
  89. ("device_type", c_int),
  90. ("device_id", c_int),
  91. ]
  92. def __init__(self):
  93. self.layout = LiteLayout()
  94. self.device_type = LiteDeviceType.LITE_CPU
  95. self.is_pinned_host = False
  96. self.device_id = 0
  97. def __repr__(self):
  98. data = {
  99. "is_pinned_host": self.is_pinned_host,
  100. "layout": LiteLayout(self.layout),
  101. "device_type": LiteDeviceType(self.device_type.value),
  102. "device_id": self.device_id,
  103. }
  104. return data.__repr__()
  105. class _TensorAPI(_LiteCObjBase):
  106. """
  107. get the api from the lib
  108. """
  109. _api_ = [
  110. ("LITE_make_tensor", [_LiteTensorDesc, POINTER(_Ctensor)]),
  111. ("LITE_set_tensor_layout", [_Ctensor, LiteLayout]),
  112. ("LITE_reset_tensor_memory", [_Ctensor, c_void_p, c_size_t]),
  113. ("LITE_reset_tensor", [_Ctensor, LiteLayout, c_void_p]),
  114. ("LITE_tensor_reshape", [_Ctensor, POINTER(c_int), c_int]),
  115. (
  116. "LITE_tensor_slice",
  117. [
  118. _Ctensor,
  119. POINTER(c_size_t),
  120. POINTER(c_size_t),
  121. POINTER(c_size_t),
  122. c_size_t,
  123. POINTER(_Ctensor),
  124. ],
  125. ),
  126. (
  127. "LITE_tensor_concat",
  128. [POINTER(_Ctensor), c_int, c_int, c_int, c_int, POINTER(_Ctensor),],
  129. ),
  130. ("LITE_tensor_fill_zero", [_Ctensor]),
  131. ("LITE_tensor_copy", [_Ctensor, _Ctensor]),
  132. ("LITE_tensor_share_memory_with", [_Ctensor, _Ctensor]),
  133. ("LITE_get_tensor_memory", [_Ctensor, POINTER(c_void_p)]),
  134. ("LITE_get_tensor_total_size_in_byte", [_Ctensor, POINTER(c_size_t)]),
  135. ("LITE_get_tensor_layout", [_Ctensor, POINTER(LiteLayout)]),
  136. ("LITE_get_tensor_device_type", [_Ctensor, POINTER(c_int)]),
  137. ("LITE_get_tensor_device_id", [_Ctensor, POINTER(c_int)]),
  138. ("LITE_destroy_tensor", [_Ctensor]),
  139. ("LITE_is_pinned_host", [_Ctensor, POINTER(c_int)]),
  140. ]
  141. class LiteTensor(object):
  142. """
  143. the tensor to hold a block of data
  144. """
  145. _api = _TensorAPI()._lib
  146. def __init__(
  147. self,
  148. layout=None,
  149. device_type=LiteDeviceType.LITE_CPU,
  150. device_id=0,
  151. is_pinned_host=False,
  152. ):
  153. """
  154. create a Tensor with layout, device, is_pinned_host param
  155. """
  156. self._tensor = _Ctensor()
  157. if layout:
  158. self._layout = layout
  159. else:
  160. self._layout = LiteLayout()
  161. self._device_type = device_type
  162. self._device_id = device_id
  163. self._is_pinned_host = is_pinned_host
  164. tensor_desc = _LiteTensorDesc()
  165. tensor_desc.layout = self._layout
  166. tensor_desc.device_type = device_type
  167. tensor_desc.device_id = device_id
  168. tensor_desc.is_pinned_host = is_pinned_host
  169. self._api.LITE_make_tensor(tensor_desc, byref(self._tensor))
  170. def __del__(self):
  171. self._api.LITE_destroy_tensor(self._tensor)
  172. def fill_zero(self):
  173. """
  174. fill the buffer memory with zero
  175. """
  176. self._api.LITE_tensor_fill_zero(self._tensor)
  177. self.update()
  178. def share_memory_with(self, src_tensor):
  179. """
  180. share the same memory with the src_tensor, the self memory will be freed
  181. """
  182. assert isinstance(src_tensor, LiteTensor)
  183. self._api.LITE_tensor_share_memory_with(self._tensor, src_tensor._tensor)
  184. self.update()
  185. @property
  186. def layout(self):
  187. self._api.LITE_get_tensor_layout(self._tensor, byref(self._layout))
  188. return self._layout
  189. @layout.setter
  190. def layout(self, layout):
  191. assert isinstance(layout, LiteLayout)
  192. self._layout = layout
  193. self._api.LITE_set_tensor_layout(self._tensor, layout)
  194. @property
  195. def is_pinned_host(self):
  196. """
  197. whether the tensor is pinned tensor
  198. """
  199. pinned = c_int()
  200. self._api.LITE_is_pinned_host(self._tensor, byref(pinned))
  201. self._is_pinned_host = pinned
  202. return bool(self._is_pinned_host)
  203. @property
  204. def device_type(self):
  205. """
  206. get device of the tensor
  207. """
  208. device_type = c_int()
  209. self._api.LITE_get_tensor_device_type(self._tensor, byref(device_type))
  210. self._device_type = device_type
  211. return LiteDeviceType(device_type.value)
  212. @property
  213. def device_id(self):
  214. """
  215. get device id of the tensor
  216. """
  217. device_id = c_int()
  218. self._api.LITE_get_tensor_device_id(self._tensor, byref(device_id))
  219. self._device_id = device_id.value
  220. return device_id.value
  221. @property
  222. def is_continue(self):
  223. """
  224. whether the tensor memory is continue
  225. """
  226. is_continue = c_int()
  227. self._api.LITE_is_memory_continue(self._tensor, byref(is_continue))
  228. return bool(is_continue.value)
  229. @property
  230. def nbytes(self):
  231. """
  232. get the length of the meomry in byte
  233. """
  234. self.update()
  235. length = c_size_t()
  236. self._api.LITE_get_tensor_total_size_in_byte(self._tensor, byref(length))
  237. return length.value
  238. def update(self):
  239. """
  240. update the member from C, this will auto used after slice, share
  241. """
  242. pinned = c_int()
  243. self._api.LITE_is_pinned_host(self._tensor, byref(pinned))
  244. self._is_pinned_host = pinned
  245. device_type = c_int()
  246. self._api.LITE_get_tensor_device_type(self._tensor, byref(device_type))
  247. self._device_type = device_type
  248. self._api.LITE_get_tensor_layout(self._tensor, byref(self._layout))
  249. def copy_from(self, src_tensor):
  250. """
  251. copy memory form the src_tensor
  252. """
  253. assert isinstance(src_tensor, LiteTensor)
  254. self._api.LITE_tensor_copy(self._tensor, src_tensor._tensor)
  255. self.update()
  256. def reshape(self, shape):
  257. """
  258. reshape the tensor with data not change, only change the shape
  259. :param shape: int arrary of dst_shape
  260. """
  261. shape = list(shape)
  262. length = len(shape)
  263. c_shape = (c_int * length)(*shape)
  264. self._api.LITE_tensor_reshape(self._tensor, c_shape, length)
  265. self.update()
  266. def slice(self, start, end, step=None):
  267. """
  268. slice the tensor with gaven start, end, step
  269. :param start: silce begin index of each dim
  270. :param end: silce end index of each dim
  271. :param step: silce step of each dim
  272. """
  273. start = list(start)
  274. end = list(end)
  275. length = len(start)
  276. assert length == len(end), "slice with different length of start and end."
  277. if step:
  278. assert length == len(step), "slice with different length of start and step."
  279. step = list(step)
  280. else:
  281. step = [1 for i in range(length)]
  282. c_start = (c_size_t * length)(*start)
  283. c_end = (c_size_t * length)(*end)
  284. c_step = (c_size_t * length)(*step)
  285. slice_tensor = LiteTensor()
  286. self._api.LITE_tensor_slice(
  287. self._tensor, c_start, c_end, c_step, length, byref(slice_tensor._tensor)
  288. )
  289. slice_tensor.update()
  290. return slice_tensor
  291. def get_ctypes_memory(self):
  292. """
  293. get the memory of the tensor, return c_void_p of the tensor memory
  294. """
  295. self.update()
  296. mem = c_void_p()
  297. self._api.LITE_get_tensor_memory(self._tensor, byref(mem))
  298. return mem
  299. def set_data_by_share(self, data, length=0, layout=None):
  300. """
  301. share the data to the tensor
  302. param data: the data will shared to the tensor, it should be a
  303. numpy.ndarray or ctypes data
  304. """
  305. self.update()
  306. if isinstance(data, np.ndarray):
  307. assert (
  308. self.is_continue
  309. ), "set_data_by_share can only apply in continue tensor."
  310. assert (
  311. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  312. ), "set_data_by_share can only apply in cpu tensor or pinned tensor."
  313. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  314. c_type = np.ctypeslib.as_ctypes_type(np_type)
  315. if self.nbytes != data.nbytes:
  316. self.layout = LiteLayout(data.shape, ctype_to_lite_dtypes[c_type])
  317. self._shared_data = data
  318. data = data.ctypes.data_as(POINTER(c_type))
  319. if layout is not None:
  320. self.layout = layout
  321. else:
  322. assert length == 0 or length == self.nbytes, "the data length is not match."
  323. self._api.LITE_reset_tensor_memory(self._tensor, data, self.nbytes)
  324. def set_data_by_copy(self, data, data_length=0, layout=None):
  325. """
  326. copy the data to the tensor
  327. param data: the data to copy to tensor, it should be list,
  328. numpy.ndarraya or ctypes with length
  329. """
  330. self.update()
  331. if layout is not None:
  332. self.layout = layout
  333. assert self.is_continue, "set_data_by_copy can only apply in continue tensor."
  334. assert (
  335. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  336. ), "set_data_by_copy can only apply in cpu tensor or pinned tensor."
  337. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  338. c_type = np.ctypeslib.as_ctypes_type(np_type)
  339. tensor_memory = c_void_p()
  340. if type(data) == list:
  341. length = len(data)
  342. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  343. tensor_length = self.nbytes
  344. assert (
  345. length * sizeof(c_type) <= tensor_length
  346. ), "the length of input data to set to the tensor is too large."
  347. arr = (c_type * length)(*data)
  348. memmove(tensor_memory, arr, sizeof(c_type) * length)
  349. elif type(data) == np.ndarray:
  350. if self.nbytes != data.nbytes:
  351. self.layout = LiteLayout(data.shape, data.dtype)
  352. arr = data.ctypes.data_as(POINTER(c_type))
  353. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  354. assert self.nbytes == data.nbytes
  355. memmove(tensor_memory, arr, self.nbytes)
  356. else:
  357. assert (
  358. data_length == self.nbytes or layout is not None
  359. ), "when input data is ctypes, the length of input data or layout must set"
  360. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  361. memmove(tensor_memory, data, data_length)
  362. def to_numpy(self):
  363. """
  364. get the buffer of the tensor
  365. """
  366. self.update()
  367. if self.nbytes <= 0:
  368. return np.array([])
  369. if self.is_continue and (
  370. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  371. ):
  372. ptr = c_void_p()
  373. self._api.LITE_get_tensor_memory(self._tensor, byref(ptr))
  374. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  375. shape = [self._layout.shapes[i] for i in range(self._layout.ndim)]
  376. np_arr = np.zeros(shape, np_type)
  377. if np_arr.nbytes:
  378. memmove(np_arr.ctypes.data_as(c_void_p), ptr, np_arr.nbytes)
  379. return np_arr
  380. else:
  381. tmp_tensor = LiteTensor(self.layout)
  382. tmp_tensor.copy_from(self)
  383. return tmp_tensor.to_numpy()
  384. def __repr__(self):
  385. self.update()
  386. data = {
  387. "layout": self._layout,
  388. "device_type": LiteDeviceType(self._device_type.value),
  389. "device_id": int(self.device_id),
  390. "is_pinned_host": bool(self._is_pinned_host),
  391. }
  392. return data.__repr__()
  393. def LiteTensorConcat(
  394. tensors, dim, device_type=LiteDeviceType.LITE_DEVICE_DEFAULT, device_id=-1
  395. ):
  396. """
  397. concat tensor in input dim to one tensor
  398. dim : the dim to act concat
  399. device_type: the result tensor device type
  400. device_id: the result tensor device id
  401. """
  402. api = _TensorAPI()._lib
  403. length = len(tensors)
  404. c_tensors = [t._tensor for t in tensors]
  405. c_tensors = (_Ctensor * length)(*c_tensors)
  406. result_tensor = LiteTensor()
  407. api.LITE_tensor_concat(
  408. cast(byref(c_tensors), POINTER(c_void_p)),
  409. length,
  410. dim,
  411. device_type,
  412. device_id,
  413. byref(result_tensor._tensor),
  414. )
  415. result_tensor.update()
  416. return result_tensor

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台