You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. # -*- coding: utf-8 -*-
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. from ctypes import *
  10. import numpy as np
  11. from .base import _Ctensor, _lib, _LiteCObjBase
  12. from .struct import LiteDataType, LiteDeviceType, LiteIOType, Structure
  13. MAX_DIM = 7
  14. _lite_type_to_nptypes = {
  15. LiteDataType.LITE_INT: np.int32,
  16. LiteDataType.LITE_FLOAT: np.float32,
  17. LiteDataType.LITE_UINT8: np.uint8,
  18. LiteDataType.LITE_INT8: np.int8,
  19. LiteDataType.LITE_INT16: np.int16,
  20. LiteDataType.LITE_UINT16: np.uint16,
  21. LiteDataType.LITE_HALF: np.float16,
  22. }
  23. _nptype_to_lite_type = {val: key for key, val in _lite_type_to_nptypes.items()}
  24. _str_nptypes_to_lite_nptypes = {
  25. np.dtype("int32"): LiteDataType.LITE_INT,
  26. np.dtype("float32"): LiteDataType.LITE_FLOAT,
  27. np.dtype("uint8"): LiteDataType.LITE_UINT8,
  28. np.dtype("int8"): LiteDataType.LITE_INT8,
  29. np.dtype("int16"): LiteDataType.LITE_INT16,
  30. np.dtype("uint16"): LiteDataType.LITE_UINT16,
  31. np.dtype("float16"): LiteDataType.LITE_HALF,
  32. }
  33. ctype_to_lite_dtypes = {
  34. c_int: LiteDataType.LITE_INT,
  35. c_uint: LiteDataType.LITE_INT,
  36. c_float: LiteDataType.LITE_FLOAT,
  37. c_ubyte: LiteDataType.LITE_UINT8,
  38. c_byte: LiteDataType.LITE_INT8,
  39. c_short: LiteDataType.LITE_INT16,
  40. c_ushort: LiteDataType.LITE_UINT16,
  41. }
  42. class LiteLayout(Structure):
  43. """
  44. the simple layout description
  45. """
  46. _fields_ = [
  47. ("shapes", c_size_t * MAX_DIM),
  48. ("ndim", c_size_t),
  49. ("data_type", c_int),
  50. ]
  51. def __init__(self, shape=None, dtype=None):
  52. if shape:
  53. shape = list(shape)
  54. assert len(shape) <= MAX_DIM, "Layout max dim is 7."
  55. self.shapes = (c_size_t * MAX_DIM)(*shape)
  56. self.ndim = len(shape)
  57. else:
  58. self.shapes = (c_size_t * MAX_DIM)()
  59. self.ndim = 0
  60. if not dtype:
  61. self.data_type = LiteDataType.LITE_FLOAT
  62. elif isinstance(dtype, LiteDataType):
  63. self.data_type = dtype
  64. elif type(dtype) == str:
  65. self.data_type = _str_nptypes_to_lite_nptypes[np.dtype(dtype)]
  66. elif isinstance(dtype, np.dtype):
  67. ctype = np.ctypeslib.as_ctypes_type(dtype)
  68. self.data_type = ctype_to_lite_dtypes[ctype]
  69. elif isinstance(dtype, type):
  70. self.data_type = _nptype_to_lite_type[dtype]
  71. else:
  72. raise RuntimeError("unkonw data type")
  73. def __repr__(self):
  74. data = {
  75. "shapes": list(self.shapes)[0 : self.ndim],
  76. "ndim": self.ndim,
  77. "data_type": _lite_type_to_nptypes[LiteDataType(self.data_type)],
  78. }
  79. return data.__repr__()
  80. class _LiteTensorDesc(Structure):
  81. """
  82. warpper of the MegEngine Tensor
  83. :is_pinned_host: when set, the storage memory of the tensor is pinned memory,
  84. this is used to Optimize the H2D or D2H memory copy, if the device or layout
  85. is not set, when copy form other device(CUDA) tensor, this tensor
  86. will be automatically set to pinned tensor
  87. """
  88. _fields_ = [
  89. ("is_pinned_host", c_int),
  90. ("layout", LiteLayout),
  91. ("device_type", c_int),
  92. ("device_id", c_int),
  93. ]
  94. def __init__(self):
  95. self.layout = LiteLayout()
  96. self.device_type = LiteDeviceType.LITE_CPU
  97. self.is_pinned_host = False
  98. self.device_id = 0
  99. def __repr__(self):
  100. data = {
  101. "is_pinned_host": self.is_pinned_host,
  102. "layout": LiteLayout(self.layout),
  103. "device_type": LiteDeviceType(self.device_type.value),
  104. "device_id": self.device_id,
  105. }
  106. return data.__repr__()
  107. class _TensorAPI(_LiteCObjBase):
  108. """
  109. get the api from the lib
  110. """
  111. _api_ = [
  112. ("LITE_make_tensor", [_LiteTensorDesc, POINTER(_Ctensor)]),
  113. ("LITE_set_tensor_layout", [_Ctensor, LiteLayout]),
  114. ("LITE_reset_tensor_memory", [_Ctensor, c_void_p, c_size_t]),
  115. ("LITE_reset_tensor", [_Ctensor, LiteLayout, c_void_p]),
  116. ("LITE_tensor_reshape", [_Ctensor, POINTER(c_int), c_int]),
  117. (
  118. "LITE_tensor_slice",
  119. [
  120. _Ctensor,
  121. POINTER(c_size_t),
  122. POINTER(c_size_t),
  123. POINTER(c_size_t),
  124. c_size_t,
  125. POINTER(_Ctensor),
  126. ],
  127. ),
  128. (
  129. "LITE_tensor_concat",
  130. [POINTER(_Ctensor), c_int, c_int, c_int, c_int, POINTER(_Ctensor),],
  131. ),
  132. ("LITE_tensor_fill_zero", [_Ctensor]),
  133. ("LITE_tensor_copy", [_Ctensor, _Ctensor]),
  134. ("LITE_tensor_share_memory_with", [_Ctensor, _Ctensor]),
  135. ("LITE_get_tensor_memory", [_Ctensor, POINTER(c_void_p)]),
  136. ("LITE_get_tensor_total_size_in_byte", [_Ctensor, POINTER(c_size_t)]),
  137. ("LITE_get_tensor_layout", [_Ctensor, POINTER(LiteLayout)]),
  138. ("LITE_get_tensor_device_type", [_Ctensor, POINTER(c_int)]),
  139. ("LITE_get_tensor_device_id", [_Ctensor, POINTER(c_int)]),
  140. ("LITE_destroy_tensor", [_Ctensor]),
  141. ("LITE_is_pinned_host", [_Ctensor, POINTER(c_int)]),
  142. ]
  143. class LiteTensor(object):
  144. """
  145. the tensor to hold a block of data
  146. """
  147. _api = _TensorAPI()._lib
  148. def __init__(
  149. self,
  150. layout=None,
  151. device_type=LiteDeviceType.LITE_CPU,
  152. device_id=0,
  153. is_pinned_host=False,
  154. ):
  155. """
  156. create a Tensor with layout, device, is_pinned_host param
  157. """
  158. self._tensor = _Ctensor()
  159. if layout:
  160. self._layout = layout
  161. else:
  162. self._layout = LiteLayout()
  163. self._device_type = device_type
  164. self._device_id = device_id
  165. self._is_pinned_host = is_pinned_host
  166. tensor_desc = _LiteTensorDesc()
  167. tensor_desc.layout = self._layout
  168. tensor_desc.device_type = device_type
  169. tensor_desc.device_id = device_id
  170. tensor_desc.is_pinned_host = is_pinned_host
  171. self._api.LITE_make_tensor(tensor_desc, byref(self._tensor))
  172. def __del__(self):
  173. self._api.LITE_destroy_tensor(self._tensor)
  174. def fill_zero(self):
  175. """
  176. fill the buffer memory with zero
  177. """
  178. self._api.LITE_tensor_fill_zero(self._tensor)
  179. self.update()
  180. def share_memory_with(self, src_tensor):
  181. """
  182. share the same memory with the src_tensor, the self memory will be freed
  183. """
  184. assert isinstance(src_tensor, LiteTensor)
  185. self._api.LITE_tensor_share_memory_with(self._tensor, src_tensor._tensor)
  186. self.update()
  187. @property
  188. def layout(self):
  189. self._api.LITE_get_tensor_layout(self._tensor, byref(self._layout))
  190. return self._layout
  191. @layout.setter
  192. def layout(self, layout):
  193. assert isinstance(layout, LiteLayout)
  194. self._layout = layout
  195. self._api.LITE_set_tensor_layout(self._tensor, layout)
  196. @property
  197. def is_pinned_host(self):
  198. """
  199. whether the tensor is pinned tensor
  200. """
  201. pinned = c_int()
  202. self._api.LITE_is_pinned_host(self._tensor, byref(pinned))
  203. self._is_pinned_host = pinned
  204. return bool(self._is_pinned_host)
  205. @property
  206. def device_type(self):
  207. """
  208. get device of the tensor
  209. """
  210. device_type = c_int()
  211. self._api.LITE_get_tensor_device_type(self._tensor, byref(device_type))
  212. self._device_type = device_type
  213. return LiteDeviceType(device_type.value)
  214. @property
  215. def device_id(self):
  216. """
  217. get device id of the tensor
  218. """
  219. device_id = c_int()
  220. self._api.LITE_get_tensor_device_id(self._tensor, byref(device_id))
  221. self._device_id = device_id.value
  222. return device_id.value
  223. @property
  224. def is_continue(self):
  225. """
  226. whether the tensor memory is continue
  227. """
  228. is_continue = c_int()
  229. self._api.LITE_is_memory_continue(self._tensor, byref(is_continue))
  230. return bool(is_continue.value)
  231. @property
  232. def nbytes(self):
  233. """
  234. get the length of the meomry in byte
  235. """
  236. self.update()
  237. length = c_size_t()
  238. self._api.LITE_get_tensor_total_size_in_byte(self._tensor, byref(length))
  239. return length.value
  240. def update(self):
  241. """
  242. update the member from C, this will auto used after slice, share
  243. """
  244. pinned = c_int()
  245. self._api.LITE_is_pinned_host(self._tensor, byref(pinned))
  246. self._is_pinned_host = pinned
  247. device_type = c_int()
  248. self._api.LITE_get_tensor_device_type(self._tensor, byref(device_type))
  249. self._device_type = device_type
  250. self._api.LITE_get_tensor_layout(self._tensor, byref(self._layout))
  251. def copy_from(self, src_tensor):
  252. """
  253. copy memory form the src_tensor
  254. """
  255. assert isinstance(src_tensor, LiteTensor)
  256. self._api.LITE_tensor_copy(self._tensor, src_tensor._tensor)
  257. self.update()
  258. def reshape(self, shape):
  259. """
  260. reshape the tensor with data not change, only change the shape
  261. :param shape: int arrary of dst_shape
  262. """
  263. shape = list(shape)
  264. length = len(shape)
  265. c_shape = (c_int * length)(*shape)
  266. self._api.LITE_tensor_reshape(self._tensor, c_shape, length)
  267. self.update()
  268. def slice(self, start, end, step=None):
  269. """
  270. slice the tensor with gaven start, end, step
  271. :param start: silce begin index of each dim
  272. :param end: silce end index of each dim
  273. :param step: silce step of each dim
  274. """
  275. start = list(start)
  276. end = list(end)
  277. length = len(start)
  278. assert length == len(end), "slice with different length of start and end."
  279. if step:
  280. assert length == len(step), "slice with different length of start and step."
  281. step = list(step)
  282. else:
  283. step = [1 for i in range(length)]
  284. c_start = (c_size_t * length)(*start)
  285. c_end = (c_size_t * length)(*end)
  286. c_step = (c_size_t * length)(*step)
  287. slice_tensor = LiteTensor()
  288. self._api.LITE_tensor_slice(
  289. self._tensor, c_start, c_end, c_step, length, byref(slice_tensor._tensor)
  290. )
  291. slice_tensor.update()
  292. return slice_tensor
  293. def get_ctypes_memory(self):
  294. """
  295. get the memory of the tensor, return c_void_p of the tensor memory
  296. """
  297. self.update()
  298. mem = c_void_p()
  299. self._api.LITE_get_tensor_memory(self._tensor, byref(mem))
  300. return mem
  301. def set_data_by_share(self, data, length=0, layout=None):
  302. """
  303. share the data to the tensor
  304. param data: the data will shared to the tensor, it should be a
  305. numpy.ndarray or ctypes data
  306. """
  307. self.update()
  308. if isinstance(data, np.ndarray):
  309. assert (
  310. self.is_continue
  311. ), "set_data_by_share can only apply in continue tensor."
  312. assert (
  313. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  314. ), "set_data_by_share can only apply in cpu tensor or pinned tensor."
  315. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  316. c_type = np.ctypeslib.as_ctypes_type(np_type)
  317. if self.nbytes != data.nbytes:
  318. self.layout = LiteLayout(data.shape, ctype_to_lite_dtypes[c_type])
  319. self._shared_data = data
  320. data = data.ctypes.data_as(POINTER(c_type))
  321. if layout is not None:
  322. self.layout = layout
  323. else:
  324. assert length == 0 or length == self.nbytes, "the data length is not match."
  325. self._api.LITE_reset_tensor_memory(self._tensor, data, self.nbytes)
  326. def set_data_by_copy(self, data, data_length=0, layout=None):
  327. """
  328. copy the data to the tensor
  329. param data: the data to copy to tensor, it should be list,
  330. numpy.ndarraya or ctypes with length
  331. """
  332. self.update()
  333. if layout is not None:
  334. self.layout = layout
  335. assert self.is_continue, "set_data_by_copy can only apply in continue tensor."
  336. assert (
  337. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  338. ), "set_data_by_copy can only apply in cpu tensor or pinned tensor."
  339. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  340. c_type = np.ctypeslib.as_ctypes_type(np_type)
  341. tensor_memory = c_void_p()
  342. if type(data) == list:
  343. length = len(data)
  344. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  345. tensor_length = self.nbytes
  346. assert (
  347. length * sizeof(c_type) <= tensor_length
  348. ), "the length of input data to set to the tensor is too large."
  349. arr = (c_type * length)(*data)
  350. memmove(tensor_memory, arr, sizeof(c_type) * length)
  351. elif type(data) == np.ndarray:
  352. if self.nbytes != data.nbytes:
  353. self.layout = LiteLayout(data.shape, data.dtype)
  354. arr = data.ctypes.data_as(POINTER(c_type))
  355. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  356. assert self.nbytes == data.nbytes
  357. memmove(tensor_memory, arr, self.nbytes)
  358. else:
  359. assert (
  360. data_length == self.nbytes or layout is not None
  361. ), "when input data is ctypes, the length of input data or layout must set"
  362. self._api.LITE_get_tensor_memory(self._tensor, byref(tensor_memory))
  363. memmove(tensor_memory, data, data_length)
  364. def to_numpy(self):
  365. """
  366. get the buffer of the tensor
  367. """
  368. self.update()
  369. if self.nbytes <= 0:
  370. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  371. return np.array([], dtype=np_type)
  372. if self.is_continue and (
  373. self.is_pinned_host or self.device_type == LiteDeviceType.LITE_CPU
  374. ):
  375. ptr = c_void_p()
  376. self._api.LITE_get_tensor_memory(self._tensor, byref(ptr))
  377. np_type = _lite_type_to_nptypes[LiteDataType(self._layout.data_type)]
  378. shape = [self._layout.shapes[i] for i in range(self._layout.ndim)]
  379. np_arr = np.zeros(shape, np_type)
  380. if np_arr.nbytes:
  381. memmove(np_arr.ctypes.data_as(c_void_p), ptr, np_arr.nbytes)
  382. return np_arr
  383. else:
  384. tmp_tensor = LiteTensor(self.layout)
  385. tmp_tensor.copy_from(self)
  386. return tmp_tensor.to_numpy()
  387. def __repr__(self):
  388. self.update()
  389. data = {
  390. "layout": self._layout,
  391. "device_type": LiteDeviceType(self._device_type.value),
  392. "device_id": int(self.device_id),
  393. "is_pinned_host": bool(self._is_pinned_host),
  394. }
  395. return data.__repr__()
  396. def LiteTensorConcat(
  397. tensors, dim, device_type=LiteDeviceType.LITE_DEVICE_DEFAULT, device_id=-1
  398. ):
  399. """
  400. concat tensor in input dim to one tensor
  401. dim : the dim to act concat
  402. device_type: the result tensor device type
  403. device_id: the result tensor device id
  404. """
  405. api = _TensorAPI()._lib
  406. length = len(tensors)
  407. c_tensors = [t._tensor for t in tensors]
  408. c_tensors = (_Ctensor * length)(*c_tensors)
  409. result_tensor = LiteTensor()
  410. api.LITE_tensor_concat(
  411. cast(byref(c_tensors), POINTER(c_void_p)),
  412. length,
  413. dim,
  414. device_type,
  415. device_id,
  416. byref(result_tensor._tensor),
  417. )
  418. result_tensor.update()
  419. return result_tensor

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台