@@ -21,3 +21,6 @@ ci/resource/prof/model_with_err_assert.mdl filter=lfs diff=lfs merge=lfs -text | |||
ci/resource/prof/test_mge.mge filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/ax_models/64-58063ce2.axe filter=lfs diff=lfs merge=lfs -text | |||
imperative/python/test/unit/module/MagicMindRuntimeOprTest.GraphShapeMutable.mlu filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/ax_data_input.npy filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/ax_data_output.npy filter=lfs diff=lfs merge=lfs -text | |||
lite/test/resource/lite/ax_model.mge filter=lfs diff=lfs merge=lfs -text |
@@ -29,7 +29,6 @@ jobs: | |||
uses: actions/checkout@v2 | |||
- name: Checkout submodules | |||
run: | | |||
apt update&&apt install ninja-build | |||
./third_party/prepare.sh | |||
./third_party/install-mkl.sh | |||
- name: Build MegEngine | |||
@@ -58,7 +57,6 @@ jobs: | |||
uses: actions/checkout@v2 | |||
- name: Checkout submodules | |||
run: | | |||
apt update&&apt install ninja-build | |||
./third_party/prepare.sh | |||
./third_party/install-mkl.sh | |||
- name: Build MegEngine | |||
@@ -12,7 +12,7 @@ MegEngine is a fast, scalable and easy-to-use deep learning framework, with auto | |||
## Installation | |||
**NOTE:** MegEngine now supports Python installation on Linux-64bit/Windows-64bit/MacOS(CPU-Only)-10.14+/Android 7+(CPU-Only) platforms with Python from 3.5 to 3.8. On Windows 10 you can either install the Linux distribution through [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl) or install the Windows distribution directly. Many other platforms are supported for inference. | |||
**NOTE:** MegEngine now supports Python installation on Linux-64bit/Windows-64bit/MacOS(CPU-Only)-10.14+ platforms with Python from 3.5 to 3.8. On Windows 10 you can either install the Linux distribution through [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl) or install the Windows distribution directly. Many other platforms are supported for inference. | |||
### Binaries | |||
@@ -13,7 +13,7 @@ MegEngine 是一个快速、可拓展、易于使用且支持自动求导的深 | |||
## 安装说明 | |||
**注意:** MegEngine 现在支持在 Linux-64bit/Windows-64bit/macos-10.14/Android 7+ 及其以上 (MacOS/Android只支持cpu) 等平台上安装 Python 包,支持Python3.5 到 Python3.8。对于 Windows 10 用户,可以通过安装 [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl) 进行体验,同时我们也原生支持Windows。MegEngine 也支持在很多其它平台上进行推理运算。 | |||
**注意:** MegEngine 现在支持在 Linux-64bit/Windows-64bit/macos-10.14及其以上 (MacOS只支持cpu) 等平台上安装 Python 包,支持Python3.5 到 Python3.8。对于 Windows 10 用户,可以通过安装 [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl) 进行体验,同时我们也原生支持Windows。MegEngine 也支持在很多其它平台上进行推理运算。 | |||
### 通过包管理器安装 | |||
@@ -26,8 +26,8 @@ python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html | |||
## 通过源码编译安装 | |||
* CMake 编译细节请参考 [BUILD_README.md](scripts/cmake-build/BUILD_README.md) | |||
* Python 绑定编译细节请参考 [BUILD_PYTHON_WHL_README.md](scripts/whl/BUILD_PYTHON_WHL_README.md) | |||
* CMake编译细节请参考 [BUILD_README.md](scripts/cmake-build/BUILD_README.md) | |||
* Python绑定编译细节请参考 [BUILD_PYTHON_WHL_README.md](scripts/whl/BUILD_PYTHON_WHL_README.md) | |||
## 如何参与贡献 | |||
@@ -27,8 +27,7 @@ function build() { | |||
-DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \ | |||
-DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \ | |||
-DMGE_WITH_TEST=ON \ | |||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |||
-DMGE_WITH_CUSTOM_OP=ON | |||
-DCMAKE_BUILD_TYPE=RelWithDebInfo | |||
make -j$(($(nproc) * 2)) -I ${build_dir} | |||
make develop | |||
popd >/dev/null | |||
@@ -363,6 +363,58 @@ static inline void trans_8x4_u16( | |||
vst1q_u16(dst_ptr + 3 * dst_step, row_3); | |||
} | |||
static inline void trans_8x3_u16( | |||
const void* src, void* dst, const size_t src_step, const size_t dst_step) { | |||
uint16_t* src_ptr = (uint16_t*)src; | |||
uint16_t* dst_ptr = (uint16_t*)dst; | |||
uint16x4_t src0 = vld1_u16(src_ptr + 0 * src_step); // A0A1A2A3 | |||
uint16x4_t src1 = vld1_u16(src_ptr + 1 * src_step); // B0B1B2B3 | |||
uint16x4_t src2 = vld1_u16(src_ptr + 2 * src_step); // C0C1C2C3 | |||
uint16x4_t src3 = vld1_u16(src_ptr + 3 * src_step); // D0D1D2D3 | |||
uint16x4_t src4 = vld1_u16(src_ptr + 4 * src_step); // E0E1E2E3 | |||
uint16x4_t src5 = vld1_u16(src_ptr + 5 * src_step); // F0F1F2F3 | |||
uint16x4_t src6 = vld1_u16(src_ptr + 6 * src_step); // G0G1G2G3 | |||
// H0H1H2 | |||
uint16x4_t src7 = | |||
vreinterpret_u16_u32(vld1_dup_u32((uint32_t*)(src_ptr + 7 * src_step))); | |||
src7 = vld1_lane_u16(src_ptr + 7 * src_step + 2, src7, 2); | |||
uint16x4_t ab_low = vzip1_u16(src0, src1); // A0B0A1B1 | |||
uint16x4_t ab_high = vzip2_u16(src0, src1); // A2B2A3B3 | |||
uint16x4_t cd_low = vzip1_u16(src2, src3); // C0D0C1D1 | |||
uint16x4_t cd_high = vzip2_u16(src2, src3); // C2D2C3D3 | |||
uint16x4_t ef_low = vzip1_u16(src4, src5); // E0F0E1F1 | |||
uint16x4_t ef_high = vzip2_u16(src4, src5); // E2F2E3F3 | |||
uint16x4_t gh_low = vzip1_u16(src6, src7); // G0H0G1H1 | |||
uint16x4_t gh_high = vzip2_u16(src6, src7); // G2H2G3 | |||
uint16x4_t abcd_0 = vreinterpret_u16_u32(vzip1_u32( | |||
vreinterpret_u32_u16(ab_low), | |||
vreinterpret_u32_u16(cd_low))); // A0B0C0D0 | |||
uint16x4_t abcd_1 = vreinterpret_u16_u32(vzip2_u32( | |||
vreinterpret_u32_u16(ab_low), | |||
vreinterpret_u32_u16(cd_low))); // A1B1C1D1 | |||
uint16x4_t abcd_2 = vreinterpret_u16_u32(vzip1_u32( | |||
vreinterpret_u32_u16(ab_high), | |||
vreinterpret_u32_u16(cd_high))); // A2B2C2D2 | |||
uint16x4_t efgh_0 = vreinterpret_u16_u32(vzip1_u32( | |||
vreinterpret_u32_u16(ef_low), | |||
vreinterpret_u32_u16(gh_low))); // E0F0G0H0 | |||
uint16x4_t efgh_1 = vreinterpret_u16_u32(vzip2_u32( | |||
vreinterpret_u32_u16(ef_low), | |||
vreinterpret_u32_u16(gh_low))); // E1F1G1H1 | |||
uint16x4_t efgh_2 = vreinterpret_u16_u32(vzip1_u32( | |||
vreinterpret_u32_u16(ef_high), | |||
vreinterpret_u32_u16(gh_high))); // E2F2G2H2 | |||
uint16x8_t row_0 = vcombine_u16(abcd_0, efgh_0); | |||
uint16x8_t row_1 = vcombine_u16(abcd_1, efgh_1); | |||
uint16x8_t row_2 = vcombine_u16(abcd_2, efgh_2); | |||
vst1q_u16(dst_ptr + 0 * dst_step, row_0); | |||
vst1q_u16(dst_ptr + 1 * dst_step, row_1); | |||
vst1q_u16(dst_ptr + 2 * dst_step, row_2); | |||
} | |||
} // anonymous namespace | |||
namespace megdnn { | |||
@@ -410,6 +462,8 @@ void transpose_block<Transpose2Byte>( | |||
const size_t dst_stride, size_t block_h, size_t block_w) { | |||
if (block_h == 8 && block_w == 4) { | |||
trans_8x4_u16(src, dst, src_stride, dst_stride); | |||
} else if (block_h == 8 && block_w == 3) { | |||
trans_8x3_u16(src, dst, src_stride, dst_stride); | |||
} else { | |||
transpose_block_fallback(src, dst, src_stride, dst_stride, block_h, block_w); | |||
} | |||
@@ -40,6 +40,9 @@ TEST_F(AARCH64, Relayout) { | |||
TensorLayout dst({1, 54, 112, 256}, {1548288, 28672, 256, 1}, dtype); | |||
checker.execl({src, dst}); | |||
} | |||
TensorLayout src_4_3({1, 3, 112, 256}, {3, 1, 1024, 4}, dtype::Uint16()); | |||
TensorLayout dst_4_3({1, 3, 112, 256}, {86016, 28672, 256, 1}, dtype::Uint16()); | |||
checker.execl({src_4_3, dst_4_3}); | |||
} | |||
TEST_F(AARCH64, RelayoutNonContig) { | |||
@@ -50,7 +50,9 @@ _sh = _stream_helper() | |||
def _valid_device(inp): | |||
if isinstance(inp, str) and re.match("^([cxg]pu|rocm)(\d+|\d+:\d+|x)$", inp): | |||
if isinstance(inp, str) and re.match( | |||
"^([cxg]pu|rocm|multithread)(\d+|\d+:\d+|x)$", inp | |||
): | |||
return True | |||
return False | |||
@@ -1153,35 +1153,39 @@ def dot(inp1: Tensor, inp2: Tensor) -> Tensor: | |||
def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | |||
r"""Computes the singular value decompositions of input matrix. | |||
r"""Returns a singular value decomposition ``A = USVh`` of a matrix (or a stack of matrices) ``x`` , where ``U`` is a matrix (or a stack of matrices) with orthonormal columns, ``S`` is a vector of non-negative numbers (or stack of vectors), and ``Vh`` is a matrix (or a stack of matrices) with orthonormal rows. | |||
Args: | |||
inp: input matrix, must has shape `[..., M, N]`. | |||
x (Tensor): A input real tensor having the shape ``(..., M, N)`` with ``x.ndim >= 2`` . | |||
full_matrices (bool, optional): If ``False`` , ``U`` and ``Vh`` have the shapes ``(..., M, K)`` and ``(..., K, N)`` , respectively, where ``K = min(M, N)`` . If ``True`` , the shapes are ``(..., M, M)`` and ``(..., N, N)`` , respectively. Default: ``False`` . | |||
compute_uv (bool, optional): Whether or not to compute ``U`` and ``Vh`` in addition to ``S`` . Default: ``True`` . | |||
Note: | |||
* naive does not support ``full_matrices`` and ``compute_uv`` as ``True`` . | |||
Returns: | |||
output matrices, `(U, sigma, V)`. | |||
Returns a tuple ( ``U`` , ``S`` , ``Vh`` ), which are SVD factors ``U`` , ``S``, ``Vh`` of input matrix ``x``. ( ``U`` , ``Vh`` only returned when ``compute_uv`` is True). | |||
``U`` contains matrices orthonormal columns (i.e., the columns are left singular vectors). If ``full_matrices`` is ``True`` , the array must have shape ``(..., M, M)`` . If ``full_matrices`` is ``False`` , the array must have shape ``(..., M, K)`` , where ``K = min(M, N)`` . | |||
Examples: | |||
.. testcode:: | |||
import numpy as np | |||
from megengine import tensor | |||
import megengine.functional as F | |||
x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2,3)) | |||
_, y, _ = F.svd(x) | |||
print(y.numpy().round(decimals=3)) | |||
>>> import numpy as np | |||
>>> x = Tensor(np.random.randn(9, 6)) | |||
>>> y = Tensor(np.random.randn(2, 7, 8, 3)) | |||
Outputs: | |||
.. testoutput:: | |||
Reconstruction based on reduced SVD, 2D case: | |||
>>> U, S, Vh = F.svd(x, full_matrices=False) | |||
>>> print(U._tuple_shape, S._tuple_shape, Vh._tuple_shape) | |||
(9, 6) (6,) (6, 6) | |||
[7.348 1. ] | |||
Reconsturction based on reduced SVD, 4D case: | |||
>>> u, s, vh = F.svd(y, full_matrices=False) | |||
>>> print(u._tuple_shape, s._tuple_shape, vh._tuple_shape) | |||
(2, 7, 8, 3) (2, 7, 3) (2, 7, 3, 3) | |||
""" | |||
op = builtin.SVD(full_matrices=full_matrices, compute_uv=compute_uv) | |||
U, sigma, V = apply(op, inp) | |||
return U, sigma, V | |||
U, S, Vh = apply(op, inp) | |||
return U, S, Vh | |||
def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor: | |||
@@ -74,7 +74,7 @@ def calculate_gain( | |||
) -> float: | |||
r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||
function. | |||
================= ==================================================== | |||
nonlinearity gain | |||
================= ==================================================== | |||
@@ -126,6 +126,11 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
r"""Calculates fan_in / fan_out value for given weight tensor. This function assumes | |||
input tensor is stored in ``NCHW`` format. | |||
Note: | |||
The group conv2d kernel shape in MegEngine is ``(G, O/G, I/G, K, K)``. This | |||
function calculates ``fan_out = O/G * K * K`` as default, but PyTorch uses | |||
``fan_out = O * K * K``. | |||
Args: | |||
tensor: weight tensor in ``NCHW`` format. | |||
""" | |||
@@ -141,6 +146,10 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
fan_in = shape[1] | |||
fan_out = shape[0] | |||
else: | |||
if ndim >= 5: | |||
# ignore the groups dimension of group conv2d and group conv3d | |||
# FIXME: will be wrong for conv3d | |||
shape = shape[1:] | |||
num_input_fmaps = shape[1] | |||
num_output_fmaps = shape[0] | |||
receptive_field_size = 1 | |||
@@ -154,7 +163,7 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||
def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
r"""Calculates fan_in / fan_out value for given weight tensor, depending on given | |||
``mode``. | |||
See :func:`calculate_fan_in_and_fan_out` for details. | |||
Args: | |||
@@ -175,11 +184,11 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||
def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||
where | |||
.. math:: | |||
a = \text{gain} \times \sqrt{\frac{6}{\text{fan_in} + \text{fan_out}}} | |||
Also known as Glorot initialization. Detailed information can be retrieved from | |||
`Understanding the difficulty of training deep feedforward neural networks` - | |||
Glorot, X. & Bengio, Y. (2010). | |||
@@ -197,11 +206,11 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||
def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||
r"""Fills tensor with random values sampled from | |||
:math:`\mathcal{N}(0, \text{std}^2)` where | |||
.. math:: | |||
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} | |||
Also known as Glorot initialization. Detailed information can be retrieved from | |||
`Understanding the difficulty of training deep feedforward neural networks` - | |||
Glorot, X. & Bengio, Y. (2010). | |||
@@ -220,11 +229,11 @@ def msra_uniform_( | |||
) -> None: | |||
r"""Fills tensor wilth random values sampled from | |||
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where | |||
.. math:: | |||
\text{bound} = \sqrt{\frac{6}{(1 + a^2) \times \text{fan_in}}} | |||
Detailed information can be retrieved from | |||
`Delving deep into rectifiers: Surpassing human-level performance on ImageNet | |||
classification` | |||
@@ -251,11 +260,11 @@ def msra_normal_( | |||
) -> None: | |||
r"""Fills tensor wilth random values sampled from | |||
:math:`\mathcal{N}(0, \text{std}^2)` where | |||
.. math:: | |||
\text{std} = \sqrt{\frac{2}{(1 + a^2) \times \text{fan_in}}} | |||
Detailed information can be retrieved from | |||
`Delving deep into rectifiers: Surpassing human-level performance on ImageNet | |||
classification` | |||
@@ -10,7 +10,7 @@ import numpy as np | |||
import pytest | |||
from megengine import tensor | |||
from megengine.module import Conv2d, Linear | |||
from megengine.module import Conv1d, Conv2d, Conv3d, Linear | |||
from megengine.module.init import calculate_fan_in_and_fan_out, fill_ | |||
@@ -32,7 +32,34 @@ def test_calculate_fan_in_and_fan_out(): | |||
with pytest.raises(ValueError): | |||
calculate_fan_in_and_fan_out(l.bias) | |||
l = Conv1d(in_channels=2, out_channels=3, kernel_size=5) | |||
fanin, fanout = calculate_fan_in_and_fan_out(l.weight) | |||
assert fanin == 2 * 5 | |||
assert fanout == 3 * 5 | |||
# FIXME: will be wrong for group conv1d | |||
# l = Conv1d(in_channels=2, out_channels=4, kernel_size=5, groups=2) | |||
# fanin, fanout = calculate_fan_in_and_fan_out(l.weight) | |||
# assert fanin == 2 // 2 * 5 | |||
# assert fanout == 4 // 2 * 5 | |||
l = Conv2d(in_channels=2, out_channels=3, kernel_size=(5, 7)) | |||
fanin, fanout = calculate_fan_in_and_fan_out(l.weight) | |||
assert fanin == 2 * 5 * 7 | |||
assert fanout == 3 * 5 * 7 | |||
l = Conv2d(in_channels=2, out_channels=4, kernel_size=(5, 7), groups=2) | |||
fanin, fanout = calculate_fan_in_and_fan_out(l.weight) | |||
assert fanin == 2 // 2 * 5 * 7 | |||
assert fanout == 4 // 2 * 5 * 7 | |||
# FIXME: will be wrong for conv3d | |||
# l = Conv3d(in_channels=2, out_channels=3, kernel_size=(5, 7, 9)) | |||
# fanin, fanout = calculate_fan_in_and_fan_out(l.weight) | |||
# assert fanin == 2 * 5 * 7 * 9 | |||
# assert fanout == 3 * 5 * 7 * 9 | |||
l = Conv3d(in_channels=2, out_channels=4, kernel_size=(5, 7, 9), groups=2) | |||
fanin, fanout = calculate_fan_in_and_fan_out(l.weight) | |||
assert fanin == 2 // 2 * 5 * 7 * 9 | |||
assert fanout == 4 // 2 * 5 * 7 * 9 |
@@ -154,6 +154,21 @@ LITE_API void set_tensor_rt_cache(std::string tensorrt_cache_path); | |||
*/ | |||
LITE_API void dump_tensor_rt_cache(); | |||
/** | |||
* register the physical and virtual address pair to the mge, some device | |||
* need the map from physical to virtual. | |||
*/ | |||
LITE_API bool register_memory_pair( | |||
void* vir_ptr, void* phy_ptr, size_t length, LiteDeviceType device, | |||
LiteBackend backend = LiteBackend::LITE_DEFAULT); | |||
/** | |||
* clear the physical and virtual address pair in mge. | |||
*/ | |||
LITE_API bool clear_memory_pair( | |||
void* vir_ptr, void* phy_ptr, LiteDeviceType device, | |||
LiteBackend backend = LiteBackend::LITE_DEFAULT); | |||
} // namespace lite | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -160,9 +160,24 @@ LITE_API int LITE_dump_persistent_cache(const char* cache_path); | |||
* \brief dump the tensorrt policy cache to file | |||
*/ | |||
LITE_API int LITE_dump_tensor_rt_cache(); | |||
#endif | |||
/** | |||
* register the physical and virtual address pair to the mge, some device | |||
* need the map from physical to virtual. | |||
*/ | |||
LITE_API int LITE_register_memory_pair( | |||
void* vir_ptr, void* phy_ptr, size_t length, LiteDeviceType device, | |||
LiteBackend backend); | |||
/** | |||
* clear the physical and virtual address pair in mge. | |||
*/ | |||
LITE_API int LITE_clear_memory_pair( | |||
void* phy_ptr, void* vir_ptr, LiteDeviceType device, LiteBackend backend); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -189,4 +189,19 @@ int LITE_dump_tensor_rt_cache() { | |||
LITE_CAPI_END(); | |||
} | |||
int LITE_register_memory_pair( | |||
void* vir_ptr, void* phy_ptr, size_t length, LiteDeviceType device, | |||
LiteBackend backend) { | |||
LITE_CAPI_BEGIN(); | |||
lite::register_memory_pair(vir_ptr, phy_ptr, length, device, backend); | |||
LITE_CAPI_END(); | |||
} | |||
int LITE_clear_memory_pair( | |||
void* phy_ptr, void* vir_ptr, LiteDeviceType device, LiteBackend backend) { | |||
LITE_CAPI_BEGIN(); | |||
lite::clear_memory_pair(vir_ptr, phy_ptr, device, backend); | |||
LITE_CAPI_END(); | |||
} | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -42,6 +42,8 @@ class _GlobalAPI(_LiteCObjBase): | |||
# ('LITE_set_tensor_rt_cache', [c_char_p]), | |||
("LITE_dump_persistent_cache", [c_char_p]), | |||
("LITE_dump_tensor_rt_cache", [c_char_p]), | |||
("LITE_register_memory_pair", [c_void_p, c_void_p, c_size_t, c_int, c_int]), | |||
("LITE_clear_memory_pair", [c_void_p, c_void_p, c_int, c_int]), | |||
] | |||
@@ -121,3 +123,21 @@ class LiteGlobal(object): | |||
@staticmethod | |||
def try_coalesce_all_free_memory(): | |||
LiteGlobal._api.LITE_try_coalesce_all_free_memory() | |||
@staticmethod | |||
def register_memory_pair( | |||
vir_ptr, phy_ptr, length, device, backend=LiteBackend.LITE_DEFAULT | |||
): | |||
assert isinstance(vir_ptr, c_void_p) and isinstance( | |||
phy_ptr, c_void_p | |||
), "clear memory pair only accept c_void_p type." | |||
LiteGlobal._api.LITE_register_memory_pair( | |||
vir_ptr, phy_ptr, length, device, backend | |||
) | |||
@staticmethod | |||
def clear_memory_pair(vir_ptr, phy_ptr, device, backend=LiteBackend.LITE_DEFAULT): | |||
assert isinstance(vir_ptr, c_void_p) and isinstance( | |||
phy_ptr, c_void_p | |||
), "clear memory pair only accept c_void_p type." | |||
LiteGlobal._api.LITE_clear_memory_pair(vir_ptr, phy_ptr, device, backend) |
@@ -212,6 +212,26 @@ void lite::dump_tensor_rt_cache() { | |||
#endif | |||
} | |||
bool lite::register_memory_pair( | |||
void* vir_ptr, void* phy_ptr, size_t length, LiteDeviceType device, | |||
LiteBackend backend) { | |||
LITE_MARK_USED_VAR(vir_ptr); | |||
LITE_MARK_USED_VAR(phy_ptr); | |||
LITE_MARK_USED_VAR(length); | |||
LITE_MARK_USED_VAR(device); | |||
LITE_MARK_USED_VAR(backend); | |||
LITE_THROW("register_memory_pair is not implement yet!"); | |||
} | |||
bool lite::clear_memory_pair( | |||
void* vir_ptr, void* phy_ptr, LiteDeviceType device, LiteBackend backend) { | |||
LITE_MARK_USED_VAR(vir_ptr); | |||
LITE_MARK_USED_VAR(phy_ptr); | |||
LITE_MARK_USED_VAR(device); | |||
LITE_MARK_USED_VAR(backend); | |||
LITE_THROW("clear_memory_pair is not implement yet!"); | |||
} | |||
#else // LITE_BUILD_WITH_MGE | |||
void lite::try_coalesce_all_free_memory() {} | |||
@@ -235,6 +255,17 @@ void lite::set_tensor_rt_cache(std::string) { | |||
void lite::dump_tensor_rt_cache() { | |||
LITE_THROW("mge is disbale at build time, please build with mge"); | |||
} | |||
bool lite::register_memory_pair( | |||
void* vir_ptr, void* phy_ptr, size_t length, LiteDeviceType device, | |||
LiteBackend beckend) { | |||
LITE_THROW("register_memory_pair is not implement yet!"); | |||
} | |||
bool lite::clear_memory_pair( | |||
void* vir_ptr, void* phy_ptr, LiteDeviceType device, LiteBackend beckend) { | |||
LITE_THROW("clear_memory_pair is not implement yet!"); | |||
} | |||
#endif | |||
namespace lite { | |||
REGIST_DECRYPTION_METHOD( | |||
@@ -1357,5 +1357,6 @@ TEST(TestNetWork, CambriconDeviceID) { | |||
load_device_id(LiteDeviceType::LITE_CAMBRICON, 0, "./model_magicmind.mgb"); | |||
} | |||
#endif | |||
#endif | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |