GitOrigin-RevId: 7c1993979c
tags/v1.9.0
@@ -285,7 +285,8 @@ struct TensorLayout : public TensorShape { | |||||
* stride | * stride | ||||
*/ | */ | ||||
void add_axis_cont_inplace(size_t axis) { | void add_axis_cont_inplace(size_t axis) { | ||||
add_axis_inplace(axis, 1, stride[axis] * shape[axis]); | |||||
ptrdiff_t stride_ = axis < ndim ? stride[axis] * shape[axis] : 1; | |||||
add_axis_inplace(axis, 1, stride_); | |||||
} | } | ||||
/*! | /*! | ||||
@@ -382,7 +382,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const { | |||||
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); | MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); | ||||
auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) { | auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) { | ||||
return (shape0 == shape1) & ((shape0 == 1) | (stride0 == stride1)); | |||||
return (shape0 == shape1) & ((shape0 <= 1) | (stride0 == stride1)); | |||||
}; | }; | ||||
if (ndim == rhs.ndim) { | if (ndim == rhs.ndim) { | ||||
size_t eq = 0; | size_t eq = 0; | ||||
@@ -13,7 +13,8 @@ | |||||
using namespace megdnn; | using namespace megdnn; | ||||
const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle(int debug_level) { | |||||
MGE_WIN_DECLSPEC_FUC const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle( | |||||
int debug_level) { | |||||
auto make = [](int deb_level) { | auto make = [](int deb_level) { | ||||
megcoreDeviceHandle_t dev_handle; | megcoreDeviceHandle_t dev_handle; | ||||
megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | ||||
@@ -32,6 +32,7 @@ | |||||
#include "./module_trace.h" | #include "./module_trace.h" | ||||
#include "./numpy_dtypes.h" | #include "./numpy_dtypes.h" | ||||
#include "./tensor.h" | #include "./tensor.h" | ||||
#include "./tensor_utils.h" | |||||
#include "./transformation.h" | #include "./transformation.h" | ||||
#include <object.h> | #include <object.h> | ||||
@@ -549,557 +550,6 @@ CompNode _get_device(PyObject* const* args, size_t nargs) { | |||||
return cn; | return cn; | ||||
} | } | ||||
bool is_scalar(PyObject* tensor) { | |||||
if (py::isinstance<PySymbolVar>(py::handle(tensor))) { | |||||
auto var = py::handle(tensor).cast<PySymbolVar*>(); | |||||
return var->is_scalar; | |||||
} | |||||
auto* tw = TensorWrapper::try_cast(tensor); | |||||
if (tw) { | |||||
return tw->m_tensor->is_scalar(); | |||||
} | |||||
return PyArray_CheckAnyScalar(tensor); | |||||
} | |||||
bool is_bool_list(PyObject* arg) { | |||||
if (!PyList_Check(arg)) { | |||||
return false; | |||||
} | |||||
size_t sz = PyList_Size(arg); | |||||
if (!sz) { | |||||
return false; | |||||
} | |||||
for (size_t i = 0; i < sz; ++i) { | |||||
PyObject* handle = PyList_GetItem(arg, i); | |||||
if (!PyBool_Check(handle)) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
bool is_bool_dtype(PyObject* args) { | |||||
if (!PyObject_HasAttrString(args, "dtype")) | |||||
return false; | |||||
PyObject* dobj = PyObject_GetAttrString(args, "dtype"); | |||||
PyArray_Descr* dtype; | |||||
PyArray_DescrConverter(dobj, &dtype); | |||||
bool ret = (dtype->kind == 'b'); | |||||
Py_XDECREF(dtype); | |||||
Py_XDECREF(dobj); | |||||
return ret; | |||||
} | |||||
py::object _Const( | |||||
py::handle value, py::handle dtype, py::handle device, py::handle ref) { | |||||
py::object val = py::reinterpret_borrow<py::object>(value); | |||||
if (PyArray_Check(value.ptr())) { | |||||
py::tuple strides = | |||||
py::reinterpret_borrow<py::tuple>(getattr(value, "strides")); | |||||
bool need_squeeze = false; | |||||
for (size_t i = 0; i < strides.size(); ++i) { | |||||
if (strides[i].cast<ptrdiff_t>() == 0) { | |||||
need_squeeze = true; | |||||
} | |||||
} | |||||
if (need_squeeze) { | |||||
val = py::reinterpret_borrow<py::array>(value); | |||||
val = val.attr("squeeze")(); | |||||
val = val.attr("reshape")(val.attr("shape")); | |||||
} | |||||
} | |||||
if (py::isinstance<PySymbolVar>(ref)) { | |||||
auto ref_var = ref.cast<PySymbolVar*>(); | |||||
auto* graph = ref_var->m_node->owner_graph(); | |||||
auto cn = device.cast<CompNode>(); | |||||
OperatorNodeConfig config(cn); | |||||
auto hv = npy::np2tensor( | |||||
val.ptr(), npy::Meth::borrow(cn), dtype.cast<mgb::DType>()); | |||||
auto typeobj = ref.get_type(); | |||||
return typeobj(opr::ImmutableTensor::make(*graph, hv, config).node()); | |||||
} | |||||
py::tuple tup = py::make_tuple(val, dtype, device, true, false, py::none()); | |||||
return TensorWrapper::make(py_tensor_type, tup.ptr(), nullptr); | |||||
} | |||||
py::tuple _make_shape_tuple(py::handle shape) { | |||||
py::list orig; | |||||
py::list ret(0); | |||||
auto solve_one = [&](py::handle val) { | |||||
if (TensorWrapper::try_cast(val.ptr()) || py::isinstance<PySymbolVar>(val)) { | |||||
py::object np = getattr(val, "numpy")(); | |||||
PyArrayObject* arr = (PyArrayObject*)np.ptr(); | |||||
PyObject* maybe_list = PyArray_ToList(arr); | |||||
if (PyList_Check(maybe_list)) { | |||||
py::list may = py::reinterpret_steal<py::list>(maybe_list); | |||||
for (size_t i = 0; i < may.size(); ++i) { | |||||
ret.append(may[i]); | |||||
} | |||||
} else { | |||||
mgb_assert(PyLong_Check(maybe_list)); | |||||
ret.append(PyLong_AsLong(maybe_list)); | |||||
Py_XDECREF(maybe_list); | |||||
} | |||||
} else if (PyArray_Check(val.ptr())) { | |||||
ret.append(PyArray_PyIntAsInt(val.ptr())); | |||||
} else { | |||||
ret.append(PyLong_AsLong(val.ptr())); | |||||
} | |||||
}; | |||||
if (PyArray_Check(shape.ptr()) && !PyArray_CheckAnyScalar(shape.ptr())) { | |||||
orig = py::reinterpret_steal<py::list>( | |||||
PyArray_ToList((PyArrayObject*)shape.ptr())); | |||||
for (size_t i = 0; i < orig.size(); ++i) { | |||||
solve_one(orig[i]); | |||||
} | |||||
} else if (PyList_Check(shape.ptr())) { | |||||
orig = py::reinterpret_borrow<py::list>(shape); | |||||
for (size_t i = 0; i < orig.size(); ++i) { | |||||
solve_one(orig[i]); | |||||
} | |||||
} else if (PyTuple_Check(shape.ptr())) { | |||||
py::tuple tup = py::reinterpret_borrow<py::tuple>(shape); | |||||
for (size_t i = 0; i < tup.size(); ++i) { | |||||
solve_one(tup[i]); | |||||
} | |||||
} else { | |||||
solve_one(shape); | |||||
} | |||||
return py::reinterpret_steal<py::tuple>(PyList_AsTuple(ret.ptr())); | |||||
} | |||||
py::object _get_index(py::object tensor, py::object src) { | |||||
if (!TensorWrapper::try_cast(tensor.ptr()) && | |||||
!py::isinstance<PySymbolVar>(tensor)) { | |||||
auto get_const = [&](mgb::DType dtype) -> py::object { | |||||
return _Const(tensor, py::cast(dtype), src.attr("device"), src); | |||||
}; | |||||
if (is_bool_list(tensor.ptr()) || is_bool_dtype(tensor.ptr())) { | |||||
tensor = get_const(dtype::Bool()); | |||||
} else { | |||||
tensor = get_const(dtype::Int32()); | |||||
} | |||||
if (!is_bool_dtype(tensor.ptr())) { | |||||
return tensor; | |||||
} | |||||
} else { | |||||
if (!is_bool_dtype(tensor.ptr())) { | |||||
return tensor; | |||||
} | |||||
} | |||||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
std::vector<PyObject*> p; | |||||
p.resize(3); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
p[2] = tensor.ptr(); | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
return ret[1]; | |||||
} | |||||
py::tuple _try_cond_take(py::handle tensor, py::handle index) { | |||||
if (!hasattr(index, "dtype") || !hasattr(index, "shape")) { | |||||
return py::tuple(); | |||||
} | |||||
if (!is_bool_dtype(index.ptr()) || | |||||
_make_shape_tuple(getattr(index, "shape")) | |||||
.not_equal(_make_shape_tuple(getattr(tensor, "shape")))) { | |||||
return py::tuple(); | |||||
} | |||||
py::object iobj; | |||||
if (PyArray_Check(index.ptr())) { | |||||
iobj = | |||||
_Const(index, py::cast((mgb::DType)dtype::Bool()), | |||||
getattr(tensor, "device"), tensor); | |||||
} else { | |||||
iobj = py::reinterpret_borrow<py::object>(index); | |||||
} | |||||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
std::vector<PyObject*> p; | |||||
p.resize(3); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
p[2] = iobj.ptr(); | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
return ret; | |||||
} | |||||
py::tuple _remove_ellipsis(py::object tensor, py::tuple tuple_val) { | |||||
size_t tuple_size = tuple_val.size(); | |||||
size_t ndim_sum = 0, cur_sum = 0; | |||||
int pos = -1; | |||||
bool has_unknown_ndim_bool_index = false; | |||||
for (size_t i = 0; i < tuple_size; ++i) { | |||||
py::object handle = tuple_val[i]; | |||||
if (handle.ptr() == Py_Ellipsis) { | |||||
pos = static_cast<int>(i); | |||||
for (size_t j = 0; j < i; ++j) { | |||||
py::object t = tuple_val[j]; | |||||
if (t.ptr() == Py_Ellipsis) { | |||||
throw py::index_error("only one ellipsis is allowed."); | |||||
} | |||||
} | |||||
} else { | |||||
size_t ndim_incr = 1; | |||||
if (hasattr(handle, "dtype") && is_bool_dtype(handle.ptr()) && | |||||
hasattr(handle, "ndim")) { | |||||
py::object ndim = getattr(handle, "ndim"); | |||||
if (PyLong_Check(ndim.ptr())) { | |||||
ndim_incr = PyLong_AsLong(ndim.ptr()); | |||||
} else { | |||||
has_unknown_ndim_bool_index = true; | |||||
} | |||||
} | |||||
cur_sum += ndim_incr; | |||||
} | |||||
} | |||||
if (pos == -1) { | |||||
return tuple_val; | |||||
} else { | |||||
if (has_unknown_ndim_bool_index) { | |||||
throw py::index_error( | |||||
"does not support bool index with unknown shape when using " | |||||
"Ellipsis."); | |||||
} | |||||
try { | |||||
ndim_sum = getattr(tensor, "ndim").cast<size_t>(); | |||||
} catch (py::error_already_set& err) { | |||||
throw py::index_error( | |||||
"does not support Ellipsis when tensor's ndim is unknown."); | |||||
} | |||||
py::tuple ret(ndim_sum - cur_sum + tuple_size - 1); | |||||
size_t idx = 0; | |||||
for (size_t i = 0; i < tuple_size; ++i) { | |||||
if (i == pos) { | |||||
for (size_t j = cur_sum; j < ndim_sum; ++j) { | |||||
ret[idx++] = PySlice_New(NULL, NULL, NULL); | |||||
} | |||||
} else { | |||||
ret[idx++] = tuple_val[i]; | |||||
} | |||||
} | |||||
return ret; | |||||
} | |||||
} | |||||
py::tuple _expand_bool_dim(py::object tensor, py::tuple tuple_val) { | |||||
py::tuple cur_shape = _make_shape_tuple(py::handle(getattr(tensor, "shape"))); | |||||
py::list new_tuple_val(0); | |||||
size_t offset = 0; | |||||
size_t tdim = 0; | |||||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
py::handle k = tuple_val[i]; | |||||
if (is_bool_dtype(k.ptr())) { | |||||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
if (ndim > 1) { | |||||
py::tuple ishape = _make_shape_tuple(py::handle(getattr(k, "shape"))); | |||||
for (size_t j = 0; j < ndim; ++j) { | |||||
if (cur_shape[tdim + j - offset].cast<size_t>() != | |||||
ishape[j].cast<size_t>()) { | |||||
std::string msg = | |||||
"boolean index did not match tensor along dimension " + | |||||
std::to_string(tdim + j) + "; dimension is " + | |||||
std::to_string( | |||||
cur_shape[tdim + j - offset].cast<size_t>()) + | |||||
" but corresponding boolean dimension is " + | |||||
std::to_string(ishape[j].cast<size_t>()); | |||||
throw py::index_error(msg.c_str()); | |||||
} | |||||
} | |||||
py::object new_k = getattr(k, "reshape")(-1); | |||||
py::object kshape = getattr(new_k, "shape"); | |||||
py::list new_shape(0); | |||||
PyObject* sym = PyObject_CallObject(cpp_use_symbolic_shape, nullptr); | |||||
bool is_sym = (sym == Py_True); | |||||
Py_XDECREF(sym); | |||||
if (is_sym) { | |||||
py::object tshape = getattr(tensor, "shape"); | |||||
for (size_t j = 0; j < i; ++j) { | |||||
new_shape.append(tshape[py::int_(j)]); | |||||
} | |||||
new_shape.append(kshape[py::int_(0)]); | |||||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
new_shape.append(cur_shape[j]); | |||||
} | |||||
py::tuple args = py::make_tuple(new_shape); | |||||
PyObject* shape_tensor = | |||||
PyObject_CallObject(cpp_astensor1d, args.ptr()); | |||||
py::object reshape_func = getattr(tensor, "reshape"); | |||||
Py_INCREF(shape_tensor); | |||||
PyObject* Args = PyTuple_New(1); | |||||
PyTuple_SetItem(Args, 0, shape_tensor); | |||||
PyObject* new_tensor = | |||||
PyObject_CallObject(reshape_func.ptr(), Args); | |||||
Py_XDECREF(Args); | |||||
tensor = py::reinterpret_steal<py::object>(new_tensor); | |||||
cur_shape = _make_shape_tuple(py::handle(shape_tensor)); | |||||
Py_XDECREF(shape_tensor); | |||||
} else { | |||||
for (size_t j = 0; j < i; ++j) { | |||||
new_shape.append(cur_shape[j]); | |||||
} | |||||
new_shape.append(py::reinterpret_borrow<py::tuple>(kshape)[0]); | |||||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
new_shape.append(cur_shape[j]); | |||||
} | |||||
cur_shape = new_shape; | |||||
tensor = getattr(tensor, "reshape")(cur_shape); | |||||
} | |||||
offset++; | |||||
tdim += ndim; | |||||
} | |||||
new_tuple_val.append(k); | |||||
} else { | |||||
new_tuple_val.append(k); | |||||
tdim++; | |||||
} | |||||
} | |||||
return py::make_tuple(tensor, py::reinterpret_borrow<py::tuple>(new_tuple_val)); | |||||
} | |||||
py::tuple _unpack_indexes(py::handle inp_hdl, py::handle idx_hdl) { | |||||
py::object inp = py::reinterpret_borrow<py::object>(inp_hdl); | |||||
py::tuple tuple_val; | |||||
if (py::isinstance<py::tuple>(idx_hdl)) { | |||||
tuple_val = py::reinterpret_borrow<py::tuple>(idx_hdl); | |||||
} else { | |||||
tuple_val = py::make_tuple(idx_hdl); | |||||
} | |||||
bool use_subtensor = true; | |||||
bool need_remove_ellipsis = false; | |||||
bool need_expand_bool_dim = false; | |||||
size_t idx_ndim = 0; | |||||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
py::object k = tuple_val[i]; | |||||
if (k.ptr() == Py_None) { | |||||
throw py::index_error("newaxis is not allowed here"); | |||||
} else if (k.ptr() == Py_Ellipsis) { | |||||
need_remove_ellipsis = true; | |||||
} else { | |||||
if (is_bool_dtype(k.ptr()) && hasattr(k, "ndim")) { | |||||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
idx_ndim += ndim; | |||||
if (ndim > 1) { | |||||
need_expand_bool_dim = true; | |||||
} | |||||
} else { | |||||
idx_ndim++; | |||||
} | |||||
} | |||||
} | |||||
try { | |||||
size_t inp_ndim = getattr(inp, "ndim").cast<size_t>(); | |||||
if (idx_ndim > inp_ndim) { | |||||
std::string msg = "too many indices for tensor: tensor is " + | |||||
std::to_string(inp_ndim) + "-dimensional, but " + | |||||
std::to_string(idx_ndim) + " were indexed"; | |||||
throw py::index_error(msg.c_str()); | |||||
} | |||||
} catch (py::error_already_set& err) { | |||||
; // ignore | |||||
} | |||||
if (need_remove_ellipsis) { | |||||
tuple_val = _remove_ellipsis(inp, tuple_val); | |||||
} | |||||
if (need_expand_bool_dim) { | |||||
py::object shape = getattr(inp, "shape"); | |||||
if (shape.ptr() != Py_None) { | |||||
py::tuple ret = _expand_bool_dim(inp, tuple_val); | |||||
inp = ret[0]; | |||||
tuple_val = ret[1]; | |||||
} | |||||
} | |||||
py::list items; | |||||
py::list tensors; | |||||
int cur_axis = -1; | |||||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
py::object handle = tuple_val[i]; | |||||
cur_axis++; | |||||
if (!is_scalar(handle.ptr()) && !PySlice_Check(handle.ptr())) { | |||||
use_subtensor = false; | |||||
} | |||||
py::list item; | |||||
item.append(cur_axis); | |||||
auto push = [&](PyObject* v) { | |||||
if (v == Py_None) { | |||||
item.append(false); | |||||
} else { | |||||
item.append(true); | |||||
tensors.append(_get_index(py::reinterpret_borrow<py::object>(v), inp)); | |||||
} | |||||
}; | |||||
if (PySlice_Check(handle.ptr())) { | |||||
PySliceObject* s = (PySliceObject*)handle.ptr(); | |||||
if (s->start == Py_None && s->stop == Py_None && s->step == Py_None) { | |||||
continue; | |||||
} | |||||
push(s->start); | |||||
push(s->stop); | |||||
push(s->step); | |||||
item.append(false); | |||||
} else { | |||||
for (size_t j = 0; j < 3; j++) | |||||
item.append(false); | |||||
push(handle.ptr()); | |||||
} | |||||
items.append(item); | |||||
} | |||||
return py::make_tuple(inp, tensors, items, use_subtensor, need_expand_bool_dim); | |||||
} | |||||
py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) { | |||||
py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl); | |||||
if (try_res.size() == 2) { | |||||
return try_res[0]; | |||||
} | |||||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
for (size_t i = 0; i < py_items.size(); ++i) { | |||||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
cpp_items.push_back( | |||||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
} | |||||
static std::shared_ptr<OpDef> op; | |||||
if (up[3].cast<bool>()) { | |||||
op = Subtensor::make(cpp_items); | |||||
} else { | |||||
op = IndexingMultiAxisVec::make(cpp_items); | |||||
} | |||||
std::vector<PyObject*> p; | |||||
p.resize(tensors.size() + 2); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
for (size_t i = 0; i < tensors.size(); ++i) { | |||||
p[i + 2] = tensors[i].ptr(); | |||||
} | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
return ret[0]; | |||||
} | |||||
py::object _setitem_cpp(py::handle inp_hdl, py::handle idx_hdl, py::handle val_hdl) { | |||||
py::object org_shape = getattr(inp_hdl, "shape"); | |||||
py::object val = py::reinterpret_borrow<py::object>(val_hdl); | |||||
if (!TensorWrapper::try_cast(val.ptr()) && !py::isinstance<PySymbolVar>(val)) { | |||||
val = | |||||
_Const(val_hdl, getattr(inp_hdl, "dtype"), getattr(inp_hdl, "device"), | |||||
inp_hdl); | |||||
} | |||||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
for (size_t i = 0; i < py_items.size(); ++i) { | |||||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
cpp_items.push_back( | |||||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
} | |||||
static std::shared_ptr<OpDef> op, set_op; | |||||
if (up[3].cast<bool>()) { | |||||
op = Subtensor::make(cpp_items); | |||||
} else { | |||||
op = IndexingMultiAxisVec::make(cpp_items); | |||||
} | |||||
std::vector<PyObject*> p; | |||||
p.resize(tensors.size() + 2); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
for (size_t i = 0; i < tensors.size(); ++i) { | |||||
p[i + 2] = tensors[i].ptr(); | |||||
} | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
py::object tmp_result = ret[0]; | |||||
try { | |||||
py::object value_tuple_shape = val.attr("_tuple_shape"); | |||||
py::object tmp_result_tuple_shape = tmp_result.attr("_tuple_shape"); | |||||
py::tuple value_shape = py::reinterpret_borrow<py::tuple>(value_tuple_shape); | |||||
py::tuple tmp_result_shape = | |||||
py::reinterpret_borrow<py::tuple>(tmp_result_tuple_shape); | |||||
for (size_t i = 0; i < value_shape.size() && i < tmp_result_shape.size(); ++i) { | |||||
size_t vs = value_shape[value_shape.size() - i - 1].cast<size_t>(); | |||||
size_t ts = | |||||
tmp_result_shape[tmp_result_shape.size() - i - 1].cast<size_t>(); | |||||
if (vs != 1 && vs != ts) { | |||||
std::string lhs = "", rhs = ""; | |||||
for (size_t j = 0; j < tmp_result_shape.size(); ++j) { | |||||
lhs += std::to_string(tmp_result_shape[j].cast<size_t>()); | |||||
if (j) | |||||
lhs += ","; | |||||
} | |||||
for (size_t j = 0; j < value_shape.size(); ++j) { | |||||
rhs += std::to_string(value_shape[j].cast<size_t>()); | |||||
if (j) | |||||
rhs += ","; | |||||
} | |||||
throw py::value_error( | |||||
"cannot copy tensor with shape (" + rhs + | |||||
") to subtensor with shape (" + lhs + ")"); | |||||
} | |||||
} | |||||
} catch (py::error_already_set& err) { | |||||
; | |||||
} | |||||
py::object broadcast_func = getattr(val, "_broadcast"); | |||||
PyObject* Args = PyTuple_New(1); | |||||
PyTuple_SetItem(Args, 0, getattr(tmp_result, "shape").release().ptr()); | |||||
PyObject* new_val = PyObject_CallObject(broadcast_func.ptr(), Args); | |||||
Py_XDECREF(Args); | |||||
val = py::reinterpret_steal<py::object>(new_val); | |||||
if (up[3].cast<bool>()) { | |||||
set_op = SetSubtensor::make(cpp_items); | |||||
} else { | |||||
set_op = IndexingSetMultiAxisVec::make(cpp_items); | |||||
} | |||||
std::vector<PyObject*> q; | |||||
q.resize(tensors.size() + 3); | |||||
py::object Set_Op = py::cast(set_op); | |||||
q[0] = Set_Op.ptr(); | |||||
q[1] = tensor.ptr(); | |||||
q[2] = val.ptr(); | |||||
for (size_t i = 0; i < tensors.size(); ++i) { | |||||
q[i + 3] = tensors[i].ptr(); | |||||
} | |||||
py::tuple result = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, q.data(), q.size())); | |||||
py::object res = result[0]; | |||||
if (up[4].cast<bool>()) { | |||||
py::object reshape_func = getattr(res, "reshape"); | |||||
PyObject* Args = PyTuple_New(1); | |||||
PyTuple_SetItem(Args, 0, org_shape.release().ptr()); | |||||
PyObject* new_tensor = PyObject_CallObject(reshape_func.ptr(), Args); | |||||
Py_XDECREF(Args); | |||||
res = py::reinterpret_steal<py::object>(new_tensor); | |||||
} | |||||
return res; | |||||
} | |||||
// Returns the dtype that would result from performing an arithmetic | // Returns the dtype that would result from performing an arithmetic | ||||
// operation on the provided input tensors and scalars. | // operation on the provided input tensors and scalars. | ||||
PyObject* dtype_promotion(PyObject* self, PyObject* const* args, size_t nargs) { | PyObject* dtype_promotion(PyObject* self, PyObject* const* args, size_t nargs) { | ||||
@@ -1126,30 +576,6 @@ PyObject* get_device(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | PYEXT17_TRANSLATE_EXC_RET(nullptr) | ||||
} | } | ||||
PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
try { | |||||
return _make_shape_tuple(py::handle(args[0])).release().ptr(); | |||||
} | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
} | |||||
PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
try { | |||||
return _getitem_cpp(py::handle(args[0]), py::handle(args[1])).release().ptr(); | |||||
} | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
} | |||||
PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
try { | |||||
return _setitem_cpp( | |||||
py::handle(args[0]), py::handle(args[1]), py::handle(args[2])) | |||||
.release() | |||||
.ptr(); | |||||
} | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
} | |||||
#ifdef METH_FASTCALL | #ifdef METH_FASTCALL | ||||
#define MGE_PY_INTERFACE(NAME, FUNC) \ | #define MGE_PY_INTERFACE(NAME, FUNC) \ | ||||
{ #NAME, (PyCFunction)FUNC, METH_FASTCALL, nullptr } | { #NAME, (PyCFunction)FUNC, METH_FASTCALL, nullptr } | ||||
@@ -38,6 +38,8 @@ namespace mgb::imperative::python { | |||||
extern interpreter::Interpreter::Channel* interpreter_for_py; | extern interpreter::Interpreter::Channel* interpreter_for_py; | ||||
extern PyTypeObject* py_tensor_type; | extern PyTypeObject* py_tensor_type; | ||||
extern PyObject* cpp_use_symbolic_shape; | |||||
extern PyObject* cpp_astensor1d; | |||||
struct Tensor : NonCopyableObj { | struct Tensor : NonCopyableObj { | ||||
private: | private: | ||||
@@ -0,0 +1,630 @@ | |||||
/** | |||||
* \file imperative/python/src/tensor.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#include "megbrain/common.h" | |||||
#include "megbrain/dtype.h" | |||||
#include "megbrain/imperative/ops/autogen.h" | |||||
#include "megbrain/imperative/ops/backward_graph.h" | |||||
#include "megbrain/imperative/ops/utility.h" | |||||
#include "megbrain/imperative/profiler.h" | |||||
#include "megbrain/imperative/transformations/eval.h" | |||||
#include "megbrain/imperative/transformations/lazy.h" | |||||
#include "megbrain/imperative/transformations/scalar.h" | |||||
#include "megbrain/imperative/transformations/symbol.h" | |||||
#include "megbrain/imperative/transformations/trace.h" | |||||
#include "megbrain/imperative/utils/map.h" | |||||
#include "megbrain/imperative/utils/stats.h" | |||||
#include "megbrain/opr/io.h" | |||||
#include "megbrain/plugin/profiler.h" | |||||
#include "./common.h" | |||||
#include "./grad.h" | |||||
#include "./graph_rt.h" | |||||
#include "./helper.h" | |||||
#include "./module_trace.h" | |||||
#include "./numpy_dtypes.h" | |||||
#include "./tensor.h" | |||||
#include "./tensor_utils.h" | |||||
#include "./transformation.h" | |||||
#include <object.h> | |||||
#include <pybind11/numpy.h> | |||||
#include <pybind11/operators.h> | |||||
#include <pybind11/pytypes.h> | |||||
#include <pyerrors.h> | |||||
#include <range/v3/all.hpp> | |||||
#include <string> | |||||
#include <unordered_map> | |||||
#include "../../src/impl/mgb_cg_impl.h" | |||||
namespace py = pybind11; | |||||
namespace views = ranges::views; | |||||
namespace mgb::imperative::python { | |||||
bool is_scalar(PyObject* tensor) { | |||||
if (py::isinstance<PySymbolVar>(py::handle(tensor))) { | |||||
auto var = py::handle(tensor).cast<PySymbolVar*>(); | |||||
return var->is_scalar; | |||||
} | |||||
auto* tw = TensorWrapper::try_cast(tensor); | |||||
if (tw) { | |||||
return tw->m_tensor->is_scalar(); | |||||
} | |||||
return PyArray_CheckAnyScalar(tensor); | |||||
} | |||||
bool is_bool_list(PyObject* arg) { | |||||
if (!PyList_Check(arg)) { | |||||
return false; | |||||
} | |||||
size_t sz = PyList_Size(arg); | |||||
if (!sz) { | |||||
return false; | |||||
} | |||||
for (size_t i = 0; i < sz; ++i) { | |||||
PyObject* handle = PyList_GetItem(arg, i); | |||||
if (!PyBool_Check(handle)) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
bool is_bool_dtype(PyObject* args) { | |||||
if (!PyObject_HasAttrString(args, "dtype")) | |||||
return false; | |||||
PyObject* dobj = PyObject_GetAttrString(args, "dtype"); | |||||
PyArray_Descr* dtype; | |||||
PyArray_DescrConverter(dobj, &dtype); | |||||
bool ret = (dtype->kind == 'b'); | |||||
Py_XDECREF(dtype); | |||||
Py_XDECREF(dobj); | |||||
return ret; | |||||
} | |||||
py::object _Const( | |||||
py::handle value, py::handle dtype, py::handle device, py::handle ref) { | |||||
py::object val = py::reinterpret_borrow<py::object>(value); | |||||
if (PyArray_Check(value.ptr())) { | |||||
py::tuple strides = | |||||
py::reinterpret_borrow<py::tuple>(getattr(value, "strides")); | |||||
bool need_squeeze = false; | |||||
for (size_t i = 0; i < strides.size(); ++i) { | |||||
if (strides[i].cast<ptrdiff_t>() == 0) { | |||||
need_squeeze = true; | |||||
} | |||||
} | |||||
if (need_squeeze) { | |||||
val = py::reinterpret_borrow<py::array>(value); | |||||
val = val.attr("squeeze")(); | |||||
val = val.attr("reshape")(val.attr("shape")); | |||||
} | |||||
} | |||||
if (py::isinstance<PySymbolVar>(ref)) { | |||||
auto ref_var = ref.cast<PySymbolVar*>(); | |||||
auto* graph = ref_var->m_node->owner_graph(); | |||||
auto cn = device.cast<CompNode>(); | |||||
OperatorNodeConfig config(cn); | |||||
auto hv = npy::np2tensor( | |||||
val.ptr(), npy::Meth::borrow(cn), dtype.cast<mgb::DType>()); | |||||
auto typeobj = ref.get_type(); | |||||
return typeobj(opr::ImmutableTensor::make(*graph, hv, config).node()); | |||||
} | |||||
py::tuple tup = py::make_tuple(val, dtype, device, true, false, py::none()); | |||||
return TensorWrapper::make(py_tensor_type, tup.ptr(), nullptr); | |||||
} | |||||
py::tuple _make_shape_tuple(py::handle shape) { | |||||
py::list orig; | |||||
py::list ret(0); | |||||
auto solve_one = [&](py::handle val) { | |||||
if (TensorWrapper::try_cast(val.ptr()) || py::isinstance<PySymbolVar>(val)) { | |||||
py::object np = getattr(val, "numpy")(); | |||||
PyArrayObject* arr = (PyArrayObject*)np.ptr(); | |||||
PyObject* maybe_list = PyArray_ToList(arr); | |||||
if (PyList_Check(maybe_list)) { | |||||
py::list may = py::reinterpret_steal<py::list>(maybe_list); | |||||
for (size_t i = 0; i < may.size(); ++i) { | |||||
ret.append(may[i]); | |||||
} | |||||
} else { | |||||
mgb_assert(PyLong_Check(maybe_list)); | |||||
ret.append(PyLong_AsLong(maybe_list)); | |||||
Py_XDECREF(maybe_list); | |||||
} | |||||
} else if (PyArray_Check(val.ptr())) { | |||||
ret.append(PyArray_PyIntAsInt(val.ptr())); | |||||
} else { | |||||
ret.append(PyLong_AsLong(val.ptr())); | |||||
} | |||||
}; | |||||
if (PyArray_Check(shape.ptr()) && !PyArray_CheckAnyScalar(shape.ptr())) { | |||||
orig = py::reinterpret_steal<py::list>( | |||||
PyArray_ToList((PyArrayObject*)shape.ptr())); | |||||
for (size_t i = 0; i < orig.size(); ++i) { | |||||
solve_one(orig[i]); | |||||
} | |||||
} else if (PyList_Check(shape.ptr())) { | |||||
orig = py::reinterpret_borrow<py::list>(shape); | |||||
for (size_t i = 0; i < orig.size(); ++i) { | |||||
solve_one(orig[i]); | |||||
} | |||||
} else if (PyTuple_Check(shape.ptr())) { | |||||
py::tuple tup = py::reinterpret_borrow<py::tuple>(shape); | |||||
for (size_t i = 0; i < tup.size(); ++i) { | |||||
solve_one(tup[i]); | |||||
} | |||||
} else { | |||||
solve_one(shape); | |||||
} | |||||
return py::reinterpret_steal<py::tuple>(PyList_AsTuple(ret.ptr())); | |||||
} | |||||
py::object _get_index(py::object tensor, py::object src) { | |||||
if (!TensorWrapper::try_cast(tensor.ptr()) && | |||||
!py::isinstance<PySymbolVar>(tensor)) { | |||||
auto get_const = [&](mgb::DType dtype) -> py::object { | |||||
return _Const(tensor, py::cast(dtype), src.attr("device"), src); | |||||
}; | |||||
if (is_bool_list(tensor.ptr()) || is_bool_dtype(tensor.ptr())) { | |||||
tensor = get_const(dtype::Bool()); | |||||
} else { | |||||
tensor = get_const(dtype::Int32()); | |||||
} | |||||
if (!is_bool_dtype(tensor.ptr())) { | |||||
return tensor; | |||||
} | |||||
} else { | |||||
if (!is_bool_dtype(tensor.ptr())) { | |||||
return tensor; | |||||
} | |||||
} | |||||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
std::vector<PyObject*> p; | |||||
p.resize(3); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
p[2] = tensor.ptr(); | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
return ret[1]; | |||||
} | |||||
py::tuple _try_cond_take(py::handle tensor, py::handle index) { | |||||
if (!hasattr(index, "dtype") || !hasattr(index, "shape")) { | |||||
return py::tuple(); | |||||
} | |||||
if (!is_bool_dtype(index.ptr()) || | |||||
_make_shape_tuple(getattr(index, "shape")) | |||||
.not_equal(_make_shape_tuple(getattr(tensor, "shape")))) { | |||||
return py::tuple(); | |||||
} | |||||
py::object iobj; | |||||
if (PyArray_Check(index.ptr())) { | |||||
iobj = | |||||
_Const(index, py::cast((mgb::DType)dtype::Bool()), | |||||
getattr(tensor, "device"), tensor); | |||||
} else { | |||||
iobj = py::reinterpret_borrow<py::object>(index); | |||||
} | |||||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||||
std::vector<PyObject*> p; | |||||
p.resize(3); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
p[2] = iobj.ptr(); | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
return ret; | |||||
} | |||||
py::tuple _remove_ellipsis(py::object tensor, py::tuple tuple_val) { | |||||
size_t tuple_size = tuple_val.size(); | |||||
size_t ndim_sum = 0, cur_sum = 0; | |||||
int pos = -1; | |||||
bool has_unknown_ndim_bool_index = false; | |||||
for (size_t i = 0; i < tuple_size; ++i) { | |||||
py::object handle = tuple_val[i]; | |||||
if (handle.ptr() == Py_Ellipsis) { | |||||
pos = static_cast<int>(i); | |||||
for (size_t j = 0; j < i; ++j) { | |||||
py::object t = tuple_val[j]; | |||||
if (t.ptr() == Py_Ellipsis) { | |||||
throw py::index_error("only one ellipsis is allowed."); | |||||
} | |||||
} | |||||
} else { | |||||
size_t ndim_incr = 1; | |||||
if (hasattr(handle, "dtype") && is_bool_dtype(handle.ptr()) && | |||||
hasattr(handle, "ndim")) { | |||||
py::object ndim = getattr(handle, "ndim"); | |||||
if (PyLong_Check(ndim.ptr())) { | |||||
ndim_incr = PyLong_AsLong(ndim.ptr()); | |||||
} else { | |||||
has_unknown_ndim_bool_index = true; | |||||
} | |||||
} | |||||
cur_sum += ndim_incr; | |||||
} | |||||
} | |||||
if (pos == -1) { | |||||
return tuple_val; | |||||
} else { | |||||
if (has_unknown_ndim_bool_index) { | |||||
throw py::index_error( | |||||
"does not support bool index with unknown shape when using " | |||||
"Ellipsis."); | |||||
} | |||||
try { | |||||
ndim_sum = getattr(tensor, "ndim").cast<size_t>(); | |||||
} catch (py::error_already_set& err) { | |||||
throw py::index_error( | |||||
"does not support Ellipsis when tensor's ndim is unknown."); | |||||
} | |||||
py::tuple ret(ndim_sum - cur_sum + tuple_size - 1); | |||||
size_t idx = 0; | |||||
for (size_t i = 0; i < tuple_size; ++i) { | |||||
if (i == pos) { | |||||
for (size_t j = cur_sum; j < ndim_sum; ++j) { | |||||
ret[idx++] = PySlice_New(NULL, NULL, NULL); | |||||
} | |||||
} else { | |||||
ret[idx++] = tuple_val[i]; | |||||
} | |||||
} | |||||
return ret; | |||||
} | |||||
} | |||||
py::tuple _expand_bool_dim(py::object tensor, py::tuple tuple_val) { | |||||
py::tuple cur_shape = _make_shape_tuple(py::handle(getattr(tensor, "shape"))); | |||||
py::list new_tuple_val(0); | |||||
size_t offset = 0; | |||||
size_t tdim = 0; | |||||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
py::handle k = tuple_val[i]; | |||||
if (is_bool_dtype(k.ptr())) { | |||||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
if (ndim > 1) { | |||||
py::tuple ishape = _make_shape_tuple(py::handle(getattr(k, "shape"))); | |||||
for (size_t j = 0; j < ndim; ++j) { | |||||
if (cur_shape[tdim + j - offset].cast<size_t>() != | |||||
ishape[j].cast<size_t>()) { | |||||
std::string msg = | |||||
"boolean index did not match tensor along dimension " + | |||||
std::to_string(tdim + j) + "; dimension is " + | |||||
std::to_string( | |||||
cur_shape[tdim + j - offset].cast<size_t>()) + | |||||
" but corresponding boolean dimension is " + | |||||
std::to_string(ishape[j].cast<size_t>()); | |||||
throw py::index_error(msg.c_str()); | |||||
} | |||||
} | |||||
py::object new_k = getattr(k, "reshape")(-1); | |||||
py::object kshape = getattr(new_k, "shape"); | |||||
py::list new_shape(0); | |||||
PyObject* sym = PyObject_CallObject(cpp_use_symbolic_shape, nullptr); | |||||
bool is_sym = (sym == Py_True); | |||||
Py_XDECREF(sym); | |||||
if (is_sym) { | |||||
py::object tshape = getattr(tensor, "shape"); | |||||
for (size_t j = 0; j < i; ++j) { | |||||
new_shape.append(tshape[py::int_(j)]); | |||||
} | |||||
new_shape.append(kshape[py::int_(0)]); | |||||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
new_shape.append(cur_shape[j]); | |||||
} | |||||
py::tuple args = py::make_tuple(new_shape); | |||||
PyObject* shape_tensor = | |||||
PyObject_CallObject(cpp_astensor1d, args.ptr()); | |||||
py::object reshape_func = getattr(tensor, "reshape"); | |||||
Py_INCREF(shape_tensor); | |||||
PyObject* Args = PyTuple_New(1); | |||||
PyTuple_SetItem(Args, 0, shape_tensor); | |||||
PyObject* new_tensor = | |||||
PyObject_CallObject(reshape_func.ptr(), Args); | |||||
Py_XDECREF(Args); | |||||
tensor = py::reinterpret_steal<py::object>(new_tensor); | |||||
cur_shape = _make_shape_tuple(py::handle(shape_tensor)); | |||||
Py_XDECREF(shape_tensor); | |||||
} else { | |||||
for (size_t j = 0; j < i; ++j) { | |||||
new_shape.append(cur_shape[j]); | |||||
} | |||||
new_shape.append(py::reinterpret_borrow<py::tuple>(kshape)[0]); | |||||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||||
new_shape.append(cur_shape[j]); | |||||
} | |||||
cur_shape = new_shape; | |||||
tensor = getattr(tensor, "reshape")(cur_shape); | |||||
} | |||||
offset++; | |||||
tdim += ndim; | |||||
} | |||||
new_tuple_val.append(k); | |||||
} else { | |||||
new_tuple_val.append(k); | |||||
tdim++; | |||||
} | |||||
} | |||||
return py::make_tuple(tensor, py::reinterpret_borrow<py::tuple>(new_tuple_val)); | |||||
} | |||||
py::tuple _unpack_indexes(py::handle inp_hdl, py::handle idx_hdl) { | |||||
py::object inp = py::reinterpret_borrow<py::object>(inp_hdl); | |||||
py::tuple tuple_val; | |||||
if (py::isinstance<py::tuple>(idx_hdl)) { | |||||
tuple_val = py::reinterpret_borrow<py::tuple>(idx_hdl); | |||||
} else { | |||||
tuple_val = py::make_tuple(idx_hdl); | |||||
} | |||||
bool use_subtensor = true; | |||||
bool need_remove_ellipsis = false; | |||||
bool need_expand_bool_dim = false; | |||||
size_t idx_ndim = 0; | |||||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
py::object k = tuple_val[i]; | |||||
if (k.ptr() == Py_None) { | |||||
throw py::index_error("newaxis is not allowed here"); | |||||
} else if (k.ptr() == Py_Ellipsis) { | |||||
need_remove_ellipsis = true; | |||||
} else { | |||||
if (is_bool_dtype(k.ptr()) && hasattr(k, "ndim")) { | |||||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||||
idx_ndim += ndim; | |||||
if (ndim > 1) { | |||||
need_expand_bool_dim = true; | |||||
} | |||||
} else { | |||||
idx_ndim++; | |||||
} | |||||
} | |||||
} | |||||
try { | |||||
size_t inp_ndim = getattr(inp, "ndim").cast<size_t>(); | |||||
if (idx_ndim > inp_ndim) { | |||||
std::string msg = "too many indices for tensor: tensor is " + | |||||
std::to_string(inp_ndim) + "-dimensional, but " + | |||||
std::to_string(idx_ndim) + " were indexed"; | |||||
throw py::index_error(msg.c_str()); | |||||
} | |||||
} catch (py::error_already_set& err) { | |||||
; // ignore | |||||
} | |||||
if (need_remove_ellipsis) { | |||||
tuple_val = _remove_ellipsis(inp, tuple_val); | |||||
} | |||||
if (need_expand_bool_dim) { | |||||
py::object shape = getattr(inp, "shape"); | |||||
if (shape.ptr() != Py_None) { | |||||
py::tuple ret = _expand_bool_dim(inp, tuple_val); | |||||
inp = ret[0]; | |||||
tuple_val = ret[1]; | |||||
} | |||||
} | |||||
py::list items; | |||||
py::list tensors; | |||||
int cur_axis = -1; | |||||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||||
py::object handle = tuple_val[i]; | |||||
cur_axis++; | |||||
if (!is_scalar(handle.ptr()) && !PySlice_Check(handle.ptr())) { | |||||
use_subtensor = false; | |||||
} | |||||
py::list item; | |||||
item.append(cur_axis); | |||||
auto push = [&](PyObject* v) { | |||||
if (v == Py_None) { | |||||
item.append(false); | |||||
} else { | |||||
item.append(true); | |||||
tensors.append(_get_index(py::reinterpret_borrow<py::object>(v), inp)); | |||||
} | |||||
}; | |||||
if (PySlice_Check(handle.ptr())) { | |||||
PySliceObject* s = (PySliceObject*)handle.ptr(); | |||||
if (s->start == Py_None && s->stop == Py_None && s->step == Py_None) { | |||||
continue; | |||||
} | |||||
push(s->start); | |||||
push(s->stop); | |||||
push(s->step); | |||||
item.append(false); | |||||
} else { | |||||
for (size_t j = 0; j < 3; j++) | |||||
item.append(false); | |||||
push(handle.ptr()); | |||||
} | |||||
items.append(item); | |||||
} | |||||
return py::make_tuple(inp, tensors, items, use_subtensor, need_expand_bool_dim); | |||||
} | |||||
py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) { | |||||
py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl); | |||||
if (try_res.size() == 2) { | |||||
return try_res[0]; | |||||
} | |||||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
for (size_t i = 0; i < py_items.size(); ++i) { | |||||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
cpp_items.push_back( | |||||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
} | |||||
static std::shared_ptr<OpDef> op; | |||||
if (up[3].cast<bool>()) { | |||||
op = Subtensor::make(cpp_items); | |||||
} else { | |||||
op = IndexingMultiAxisVec::make(cpp_items); | |||||
} | |||||
std::vector<PyObject*> p; | |||||
p.resize(tensors.size() + 2); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
for (size_t i = 0; i < tensors.size(); ++i) { | |||||
p[i + 2] = tensors[i].ptr(); | |||||
} | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
return ret[0]; | |||||
} | |||||
py::object _setitem_cpp(py::handle inp_hdl, py::handle idx_hdl, py::handle val_hdl) { | |||||
py::object org_shape = getattr(inp_hdl, "shape"); | |||||
py::object val = py::reinterpret_borrow<py::object>(val_hdl); | |||||
if (!TensorWrapper::try_cast(val.ptr()) && !py::isinstance<PySymbolVar>(val)) { | |||||
val = | |||||
_Const(val_hdl, getattr(inp_hdl, "dtype"), getattr(inp_hdl, "device"), | |||||
inp_hdl); | |||||
} | |||||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||||
for (size_t i = 0; i < py_items.size(); ++i) { | |||||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||||
cpp_items.push_back( | |||||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||||
} | |||||
static std::shared_ptr<OpDef> op, set_op; | |||||
if (up[3].cast<bool>()) { | |||||
op = Subtensor::make(cpp_items); | |||||
} else { | |||||
op = IndexingMultiAxisVec::make(cpp_items); | |||||
} | |||||
std::vector<PyObject*> p; | |||||
p.resize(tensors.size() + 2); | |||||
py::object Op = py::cast(op); | |||||
p[0] = Op.ptr(); | |||||
p[1] = tensor.ptr(); | |||||
for (size_t i = 0; i < tensors.size(); ++i) { | |||||
p[i + 2] = tensors[i].ptr(); | |||||
} | |||||
py::tuple ret = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||||
py::object tmp_result = ret[0]; | |||||
try { | |||||
py::object value_tuple_shape = val.attr("_tuple_shape"); | |||||
py::object tmp_result_tuple_shape = tmp_result.attr("_tuple_shape"); | |||||
py::tuple value_shape = py::reinterpret_borrow<py::tuple>(value_tuple_shape); | |||||
py::tuple tmp_result_shape = | |||||
py::reinterpret_borrow<py::tuple>(tmp_result_tuple_shape); | |||||
for (size_t i = 0; i < value_shape.size() && i < tmp_result_shape.size(); ++i) { | |||||
size_t vs = value_shape[value_shape.size() - i - 1].cast<size_t>(); | |||||
size_t ts = | |||||
tmp_result_shape[tmp_result_shape.size() - i - 1].cast<size_t>(); | |||||
if (vs != 1 && vs != ts) { | |||||
std::string lhs = "", rhs = ""; | |||||
for (size_t j = 0; j < tmp_result_shape.size(); ++j) { | |||||
lhs += std::to_string(tmp_result_shape[j].cast<size_t>()); | |||||
if (j) | |||||
lhs += ","; | |||||
} | |||||
for (size_t j = 0; j < value_shape.size(); ++j) { | |||||
rhs += std::to_string(value_shape[j].cast<size_t>()); | |||||
if (j) | |||||
rhs += ","; | |||||
} | |||||
throw py::value_error( | |||||
"cannot copy tensor with shape (" + rhs + | |||||
") to subtensor with shape (" + lhs + ")"); | |||||
} | |||||
} | |||||
} catch (py::error_already_set& err) { | |||||
; | |||||
} | |||||
py::object broadcast_func = getattr(val, "_broadcast"); | |||||
PyObject* Args = PyTuple_New(1); | |||||
PyTuple_SetItem(Args, 0, getattr(tmp_result, "shape").release().ptr()); | |||||
PyObject* new_val = PyObject_CallObject(broadcast_func.ptr(), Args); | |||||
Py_XDECREF(Args); | |||||
val = py::reinterpret_steal<py::object>(new_val); | |||||
if (up[3].cast<bool>()) { | |||||
set_op = SetSubtensor::make(cpp_items); | |||||
} else { | |||||
set_op = IndexingSetMultiAxisVec::make(cpp_items); | |||||
} | |||||
std::vector<PyObject*> q; | |||||
q.resize(tensors.size() + 3); | |||||
py::object Set_Op = py::cast(set_op); | |||||
q[0] = Set_Op.ptr(); | |||||
q[1] = tensor.ptr(); | |||||
q[2] = val.ptr(); | |||||
for (size_t i = 0; i < tensors.size(); ++i) { | |||||
q[i + 3] = tensors[i].ptr(); | |||||
} | |||||
py::tuple result = | |||||
py::reinterpret_steal<py::object>(py_apply(NULL, q.data(), q.size())); | |||||
py::object res = result[0]; | |||||
if (up[4].cast<bool>()) { | |||||
py::object reshape_func = getattr(res, "reshape"); | |||||
PyObject* Args = PyTuple_New(1); | |||||
PyTuple_SetItem(Args, 0, org_shape.release().ptr()); | |||||
PyObject* new_tensor = PyObject_CallObject(reshape_func.ptr(), Args); | |||||
Py_XDECREF(Args); | |||||
res = py::reinterpret_steal<py::object>(new_tensor); | |||||
} | |||||
return res; | |||||
} | |||||
PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
try { | |||||
return _make_shape_tuple(py::handle(args[0])).release().ptr(); | |||||
} | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
} | |||||
PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
try { | |||||
return _getitem_cpp(py::handle(args[0]), py::handle(args[1])).release().ptr(); | |||||
} | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
} | |||||
PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||||
try { | |||||
return _setitem_cpp( | |||||
py::handle(args[0]), py::handle(args[1]), py::handle(args[2])) | |||||
.release() | |||||
.ptr(); | |||||
} | |||||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||||
} | |||||
} // namespace mgb::imperative::python |
@@ -0,0 +1,11 @@ | |||||
#pragma once | |||||
namespace mgb::imperative::python { | |||||
PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs); | |||||
PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs); | |||||
PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs); | |||||
} // namespace mgb::imperative::python |
@@ -642,7 +642,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) { | |||||
m_dtr.update_used_time(dest); | m_dtr.update_used_time(dest); | ||||
MGB_RECORD_EVENT( | MGB_RECORD_EVENT( | ||||
TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), | TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), | ||||
ptr->dev_tensor().raw_ptr()); | |||||
ptr->dev_tensor(false).raw_ptr()); | |||||
// update tensor desc for static infer | // update tensor desc for static infer | ||||
if (dest->desc.layout.ndim) { | if (dest->desc.layout.ndim) { | ||||
mgb_assert( | mgb_assert( | ||||
@@ -730,10 +730,20 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) { | |||||
inputs, apply_functor, const_functor); | inputs, apply_functor, const_functor); | ||||
return outputs; | return outputs; | ||||
} | } | ||||
return OpDef::apply_on_physical_tensor(def, inputs, output_descs, validated); | |||||
// Check Input Layout | |||||
// Get the input layout constraints, and if the constraint is not satisfied | |||||
// inplace update the layout and blob to make the tensor contiguous | |||||
auto&& constraints = OpDef::get_input_layout_constraint(def, inputs); | |||||
for (size_t idx = 0; idx < inputs.size(); ++idx) { | |||||
auto&& layout_checker = constraints[idx]; | |||||
if (layout_checker) { | |||||
inputs[idx]->to_contiguous_inplace(layout_checker); | |||||
} | |||||
} | |||||
return OpDef::apply_on_physical_tensor( | |||||
def, std::move(inputs), output_descs, validated); | |||||
}; | }; | ||||
MGB_RECORD_EVENT(OpExecuteEvent, apply_id, {}, reason); | MGB_RECORD_EVENT(OpExecuteEvent, apply_id, {}, reason); | ||||
// Begin profiling operator | |||||
SmallVector<std::pair<CompNode, uint64_t>> kernels; | SmallVector<std::pair<CompNode, uint64_t>> kernels; | ||||
if (profiling_device) { | if (profiling_device) { | ||||
// Collecting devices | // Collecting devices | ||||
@@ -1 +1,2 @@ | |||||
#include "../../../src/core/impl/graph/cg_impl.h" | #include "../../../src/core/impl/graph/cg_impl.h" | ||||
#include "../../../src/core/impl/graph/var_node_mem_mgr.h" |
@@ -60,6 +60,11 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> OpDef::infer_output_attrs_falli | |||||
return def.trait()->infer_output_attrs_fallible(def, inputs); | return def.trait()->infer_output_attrs_fallible(def, inputs); | ||||
} | } | ||||
SmallVector<VarNode::LayoutConstraintCallback> OpDef::get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
return def.trait()->get_input_layout_constraint(def, inputs); | |||||
} | |||||
EncodedSubgraph OpDef::make_backward_graph( | EncodedSubgraph OpDef::make_backward_graph( | ||||
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs, | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs, | ||||
const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
@@ -47,6 +47,10 @@ void OpMethFallbackByProxyGraph::impl( | |||||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | ||||
func.Base::operator=(proxy_graph_detail::infer_output_attrs_fallible); | func.Base::operator=(proxy_graph_detail::infer_output_attrs_fallible); | ||||
} | } | ||||
void OpMethFallbackByProxyGraph::impl( | |||||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint) { | |||||
func.Base::operator=(proxy_graph_detail::get_input_layout_constraint); | |||||
} | |||||
void OpMethFallbackByProxyGraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | void OpMethFallbackByProxyGraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | ||||
func.Base::operator=(proxy_graph_detail::make_backward_graph); | func.Base::operator=(proxy_graph_detail::make_backward_graph); | ||||
} | } | ||||
@@ -63,6 +67,10 @@ void OpMethFallbackFromSubgraph::impl( | |||||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | ||||
func.Base::operator=(subgraph_detail::infer_output_attrs_fallible); | func.Base::operator=(subgraph_detail::infer_output_attrs_fallible); | ||||
} | } | ||||
void OpMethFallbackFromSubgraph::impl( | |||||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint) { | |||||
func.Base::operator=(subgraph_detail::get_input_layout_constraint); | |||||
} | |||||
void OpMethFallbackFromSubgraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | void OpMethFallbackFromSubgraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | ||||
func.Base::operator=(subgraph_detail::make_backward_graph); | func.Base::operator=(subgraph_detail::make_backward_graph); | ||||
} | } | ||||
@@ -73,6 +73,9 @@ OpMethType(ApplyOnVarNode, | |||||
OpMethType(InferOutputAttrsFallible, | OpMethType(InferOutputAttrsFallible, | ||||
decltype(OpDef::infer_output_attrs_fallible)); | decltype(OpDef::infer_output_attrs_fallible)); | ||||
OpMethType(GetInputLayoutConstraint, | |||||
decltype(OpDef::get_input_layout_constraint)); | |||||
OpMethType(GradMaker, | OpMethType(GradMaker, | ||||
decltype(OpDef::make_backward_graph)); | decltype(OpDef::make_backward_graph)); | ||||
@@ -119,6 +122,8 @@ struct OpMethFallbackByProxyGraph : OpMethImplBase { | |||||
static void impl(ApplyOnPhysicalTensor& func, op_meth_tag::ApplyOnPhysicalTensor); | static void impl(ApplyOnPhysicalTensor& func, op_meth_tag::ApplyOnPhysicalTensor); | ||||
static void impl( | static void impl( | ||||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | ||||
static void impl( | |||||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint); | |||||
static void impl(GradMaker& func, op_meth_tag::GradMaker); | static void impl(GradMaker& func, op_meth_tag::GradMaker); | ||||
}; | }; | ||||
@@ -128,6 +133,8 @@ struct OpMethFallbackFromSubgraph : OpMethImplBase { | |||||
static void impl(ApplyOnVarNode& func, op_meth_tag::ApplyOnVarNode); | static void impl(ApplyOnVarNode& func, op_meth_tag::ApplyOnVarNode); | ||||
static void impl( | static void impl( | ||||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | ||||
static void impl( | |||||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint); | |||||
static void impl(GradMaker& func, op_meth_tag::GradMaker); | static void impl(GradMaker& func, op_meth_tag::GradMaker); | ||||
}; | }; | ||||
@@ -179,6 +186,7 @@ struct OpTrait { | |||||
ApplyOnDeviceTensorND apply_on_device_tensornd; | ApplyOnDeviceTensorND apply_on_device_tensornd; | ||||
ApplyOnVarNode apply_on_var_node; | ApplyOnVarNode apply_on_var_node; | ||||
InferOutputAttrsFallible infer_output_attrs_fallible; | InferOutputAttrsFallible infer_output_attrs_fallible; | ||||
GetInputLayoutConstraint get_input_layout_constraint; | |||||
GradMaker make_backward_graph; | GradMaker make_backward_graph; | ||||
Props props; | Props props; | ||||
HashFunc hash; | HashFunc hash; | ||||
@@ -199,6 +207,7 @@ struct OpTrait { | |||||
cb(apply_on_device_tensornd) \ | cb(apply_on_device_tensornd) \ | ||||
cb(apply_on_var_node) \ | cb(apply_on_var_node) \ | ||||
cb(infer_output_attrs_fallible) \ | cb(infer_output_attrs_fallible) \ | ||||
cb(get_input_layout_constraint) \ | |||||
cb(make_backward_graph) \ | cb(make_backward_graph) \ | ||||
cb(props) \ | cb(props) \ | ||||
cb(hash) \ | cb(hash) \ | ||||
@@ -117,7 +117,7 @@ void InputCallback::scn_do_execute() { | |||||
layout.init_contiguous_stride(); | layout.init_contiguous_stride(); | ||||
dev_tensor.reset(dev_tensor.storage(), layout); | dev_tensor.reset(dev_tensor.storage(), layout); | ||||
} | } | ||||
output(0)->reset_dev_tensor_from_tensor(dev_tensor); | |||||
output(0)->force_assign_dev_tensor_from_tensor(dev_tensor); | |||||
} | } | ||||
cg::OperatorNodeBase* InputCallback::shallow_copy( | cg::OperatorNodeBase* InputCallback::shallow_copy( | ||||
@@ -311,7 +311,7 @@ cg::OperatorNodeBase::NodeProp* MutableTensor::do_make_node_prop() const { | |||||
} | } | ||||
void MutableTensor::scn_do_execute() { | void MutableTensor::scn_do_execute() { | ||||
output(0)->reset_dev_tensor_from_tensor(*m_dev_tensor); | |||||
output(0)->force_assign_dev_tensor_from_tensor(*m_dev_tensor); | |||||
} | } | ||||
void MutableTensor::init_output_static_infer_desc() { | void MutableTensor::init_output_static_infer_desc() { | ||||
@@ -83,28 +83,18 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | |||||
SmallVector<TensorPtr> apply_on_physical_tensor( | SmallVector<TensorPtr> apply_on_physical_tensor( | ||||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | const OpDef& def, const SmallVector<TensorPtr>& inputs, | ||||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | ||||
auto& input = inputs[0]; | |||||
TensorShape target_shape; | |||||
if (validated) { | |||||
target_shape = output_descs[0].layout; | |||||
} else { | |||||
cg::copy_tensor_value_to_shape( | |||||
target_shape, inputs[1]->get_value().proxy_to_default_cpu()); | |||||
} | |||||
TensorPtr output = Tensor::make( | |||||
TensorLayout(target_shape, input->dtype()), input->comp_node()); | |||||
if (output->layout().is_empty()) { | |||||
return {output}; | |||||
} | |||||
if (input->shape().eq_shape(output->shape())) { | |||||
mgb_assert(input->layout().eq_layout(output->layout())); | |||||
output->dev_tensor().copy_from_fixlayout(input->dev_tensor()); | |||||
} else { | |||||
TensorLayout input_layout = input->layout().broadcast(output->shape()); | |||||
output->dev_tensor().copy_from_fixlayout( | |||||
input->dev_tensor().sub(SubTensorSpec::make_from_layout(input_layout))); | |||||
} | |||||
return {output}; | |||||
def.cast_final_safe<Broadcast>(); | |||||
size_t nr_inp = inputs.size(); | |||||
mgb_assert(nr_inp == 2, "Broadcast expects 2 inputs; got %lu actually", nr_inp); | |||||
auto&& src = inputs[0]; | |||||
auto&& tshp_nd = inputs[1]; | |||||
auto slayout = src->layout(); | |||||
TensorShape tshp; | |||||
cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | |||||
TensorLayout tlayout = slayout.broadcast(tshp); | |||||
// memory forward | |||||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
} | } | ||||
OP_TRAIT_REG(Broadcast, Broadcast, opr::Broadcast) | OP_TRAIT_REG(Broadcast, Broadcast, opr::Broadcast) | ||||
@@ -184,10 +174,6 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
auto&& tshp_nd = inputs[1]; | auto&& tshp_nd = inputs[1]; | ||||
auto slayout = src->layout(); | auto slayout = src->layout(); | ||||
if (validated) { | |||||
return {Tensor::make(src->blob(), 0, output_descs[0].layout)}; | |||||
} | |||||
TensorShape tshp; | TensorShape tshp; | ||||
cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | ||||
if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | ||||
@@ -195,13 +181,39 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
tshp[op_def.axis] = 1; | tshp[op_def.axis] = 1; | ||||
tshp[op_def.axis] = src->layout().total_nr_elems() / tshp.total_nr_elems(); | tshp[op_def.axis] = src->layout().total_nr_elems() / tshp.total_nr_elems(); | ||||
} | } | ||||
return {Tensor::make(src->blob(), 0, slayout.reshape(tshp))}; | |||||
TensorLayout tlayout; | |||||
mgb_assert(slayout.try_reshape(tlayout, tshp)); | |||||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
} | |||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
auto&& op_def = def.cast_final_safe<Reshape>(); | |||||
SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||||
layout_checker[0] = [&](const TensorLayout& layout) { | |||||
TensorShape tshp; | |||||
TensorLayout ret; | |||||
cg::copy_tensor_value_to_shape( | |||||
tshp, inputs[1]->get_value().proxy_to_default_cpu()); | |||||
if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | |||||
mgb_assert(tshp[op_def.axis] == -1); | |||||
tshp[op_def.axis] = 1; | |||||
tshp[op_def.axis] = layout.total_nr_elems() / tshp.total_nr_elems(); | |||||
} | |||||
if (layout.try_reshape(ret, tshp)) { | |||||
return true; | |||||
} else { | |||||
return false; | |||||
} | |||||
}; | |||||
return layout_checker; | |||||
} | } | ||||
OP_TRAIT_REG(Reshape, Reshape) | OP_TRAIT_REG(Reshape, Reshape) | ||||
.apply_on_var_node(apply_on_var_node) | .apply_on_var_node(apply_on_var_node) | ||||
.infer_output_attrs_fallible(infer_output_attrs_fallible) | .infer_output_attrs_fallible(infer_output_attrs_fallible) | ||||
.apply_on_physical_tensor(apply_on_physical_tensor) | .apply_on_physical_tensor(apply_on_physical_tensor) | ||||
.get_input_layout_constraint(get_input_layout_constraint) | |||||
.fallback(); | .fallback(); | ||||
} // namespace reshape | } // namespace reshape | ||||
@@ -220,12 +220,22 @@ cg::OperatorNodeBase* apply_inplace_add_on_var_node( | |||||
SmallVector<TensorPtr> apply_inplace_add_on_physical_tensor( | SmallVector<TensorPtr> apply_inplace_add_on_physical_tensor( | ||||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | const OpDef& def, const SmallVector<TensorPtr>& inputs, | ||||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | ||||
mgb_assert( | |||||
inputs[0]->blob().use_count() == 1 && inputs[0]->blob()->storage().unique(), | |||||
"This inplace modification may change the elements of other tensors. " | |||||
"Please set MEGENGINE_INPLACE_UPDATE to 0 to ensure the program runs " | |||||
"correctly."); | |||||
auto dest = inputs[0], delta = inputs[1], alpha = inputs[2], beta = inputs[3]; | auto dest = inputs[0], delta = inputs[1], alpha = inputs[2], beta = inputs[3]; | ||||
if (!(inputs[0]->blob().unique() && inputs[0]->blob()->storage().unique())) { | |||||
mgb_log_warn( | |||||
"This inplace modification may change the elements of other tensors. " | |||||
"Fallback to non-inplace update."); | |||||
DeviceTensorStorage storage; | |||||
storage.reset(dest->comp_node(), dest->blob()->size(), dest->blob()->storage()); | |||||
storage = storage.sub(dest->offset()); | |||||
DeviceTensorND dv; | |||||
dv.reset(storage, dest->layout()); | |||||
DeviceTensorND dv_new; | |||||
dv_new.copy_from(dv); | |||||
dest = Tensor::make(dv_new); | |||||
} | |||||
auto tensor_to_scalar = [](const TensorPtr& tensor) -> float { | auto tensor_to_scalar = [](const TensorPtr& tensor) -> float { | ||||
return *tensor->get_value().ptr<float>(); | return *tensor->get_value().ptr<float>(); | ||||
}; | }; | ||||
@@ -54,7 +54,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | const OpDef& def, const SmallVector<TensorPtr>& inputs, | ||||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | ||||
if (memory_forward_success(def, inputs)) { | if (memory_forward_success(def, inputs)) { | ||||
return {Tensor::make(inputs[0]->blob(), 0, inputs[0]->layout())}; | |||||
return {Tensor::make( | |||||
inputs[0]->blob(), inputs[0]->offset(), inputs[0]->layout())}; | |||||
} | } | ||||
return proxy_graph_detail::apply_on_physical_tensor( | return proxy_graph_detail::apply_on_physical_tensor( | ||||
def, inputs, output_descs, validated); | def, inputs, output_descs, validated); | ||||
@@ -73,11 +74,21 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | |||||
return {output_descs, validated}; | return {output_descs, validated}; | ||||
} | } | ||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||||
layout_checker[0] = [](const TensorLayout& layout) { | |||||
return layout.is_contiguous(); | |||||
}; | |||||
return layout_checker; | |||||
} | |||||
OP_TRAIT_REG(Reduce, Reduce, opr::Reduce) | OP_TRAIT_REG(Reduce, Reduce, opr::Reduce) | ||||
.make_from_op_node(make_from_op_node) | .make_from_op_node(make_from_op_node) | ||||
.apply_on_var_node(apply_on_var_node) | .apply_on_var_node(apply_on_var_node) | ||||
.apply_on_physical_tensor(apply_on_physical_tensor) | .apply_on_physical_tensor(apply_on_physical_tensor) | ||||
.infer_output_attrs_fallible(infer_output_attrs_fallible) | .infer_output_attrs_fallible(infer_output_attrs_fallible) | ||||
.get_input_layout_constraint(get_input_layout_constraint) | |||||
.fallback(); | .fallback(); | ||||
} // namespace reduce | } // namespace reduce | ||||
} // namespace | } // namespace | ||||
@@ -594,6 +594,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible<Dro | |||||
return {dests, true}; | return {dests, true}; | ||||
} | } | ||||
template <typename Op> | |||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||||
return layout_checker; | |||||
} | |||||
} // anonymous namespace | } // anonymous namespace | ||||
Handle new_handle(CompNode comp_node, uint64_t seed) { | Handle new_handle(CompNode comp_node, uint64_t seed) { | ||||
@@ -622,6 +629,7 @@ CompNode get_rng_handle_compnode(Handle handle) { | |||||
.apply_on_var_node(apply_on_var_node<NAME, Output>) \ | .apply_on_var_node(apply_on_var_node<NAME, Output>) \ | ||||
.apply_on_physical_tensor(apply_on_physical_tensor<NAME>) \ | .apply_on_physical_tensor(apply_on_physical_tensor<NAME>) \ | ||||
.infer_output_attrs_fallible(infer_output_attrs_fallible<NAME>) \ | .infer_output_attrs_fallible(infer_output_attrs_fallible<NAME>) \ | ||||
.get_input_layout_constraint(get_input_layout_constraint<NAME>) \ | |||||
.fallback(); \ | .fallback(); \ | ||||
} | } | ||||
@@ -60,9 +60,55 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||||
return opr::Dimshuffle::make(inputs[0], ds.pattern, 0UL, config); | return opr::Dimshuffle::make(inputs[0], ds.pattern, 0UL, config); | ||||
} | } | ||||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||||
auto&& ds = static_cast<const Dimshuffle&>(def); | |||||
mgb_assert( | |||||
ds.pattern.size() <= TensorShape::MAX_NDIM, | |||||
"Dimshuffle pattern exceeds max length of %zd", TensorShape::MAX_NDIM); | |||||
size_t nr_inp = inputs.size(); | |||||
mgb_assert(nr_inp == 1, "Dimshuffle expects 1 inputs; got %lu actually", nr_inp); | |||||
auto&& src = inputs[0]; | |||||
auto inp_layout = src->layout(); | |||||
size_t pattern_ndim = *std::max_element(ds.pattern.begin(), ds.pattern.end()) + 1; | |||||
mgb_assert( | |||||
inp_layout.ndim == pattern_ndim, | |||||
"input ndim mismatch for Dimshuffle: expect=%zd actual=%zd", pattern_ndim, | |||||
inp_layout.ndim); | |||||
TensorLayout out_layout{inp_layout.dtype}; | |||||
out_layout.ndim = ds.pattern.size(); | |||||
size_t idx = 0; | |||||
bool input_used[TensorLayout::MAX_NDIM] = {0}; | |||||
for (auto i : ds.pattern) { | |||||
if (i < 0) { | |||||
out_layout.shape[idx] = 1; | |||||
out_layout.stride[idx] = 1; | |||||
} else { | |||||
input_used[i] = true; | |||||
out_layout.shape[idx] = inp_layout.shape[i]; | |||||
out_layout.stride[idx] = inp_layout.stride[i]; | |||||
} | |||||
++idx; | |||||
} | |||||
if (out_layout.is_contiguous()) { | |||||
out_layout.init_contiguous_stride(); | |||||
} | |||||
for (size_t i = 0; i < pattern_ndim; ++i) { | |||||
mgb_assert( | |||||
input_used[i] || inp_layout.shape[i] == 1, | |||||
"non-1 dim discarded in Dimshuffle: ishp=%s dim=%zd", | |||||
inp_layout.megdnn::TensorShape::to_string().c_str(), i); | |||||
} | |||||
// memory forward | |||||
return {Tensor::make(src->blob(), src->offset(), out_layout)}; | |||||
} | |||||
OP_TRAIT_REG(Dimshuffle, Dimshuffle, opr::Dimshuffle) | OP_TRAIT_REG(Dimshuffle, Dimshuffle, opr::Dimshuffle) | ||||
.make_from_op_node(make_from_op_node) | .make_from_op_node(make_from_op_node) | ||||
.apply_on_var_node(apply_on_var_node) | .apply_on_var_node(apply_on_var_node) | ||||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||||
.fallback(); | .fallback(); | ||||
} // namespace dimshuffle | } // namespace dimshuffle | ||||
} // namespace | } // namespace | ||||
@@ -80,7 +126,25 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||||
return opr::AxisAddRemove::make(inputs[0], param, config); | return opr::AxisAddRemove::make(inputs[0], param, config); | ||||
} | } | ||||
OP_TRAIT_REG(AddAxis, AddAxis).apply_on_var_node(apply_on_var_node).fallback(); | |||||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||||
auto&& op_def = def.cast_final_safe<AddAxis>(); | |||||
size_t nr_inp = inputs.size(); | |||||
mgb_assert(nr_inp == 1, "AddAxis expects 1 inputs; got %lu actually", nr_inp); | |||||
auto&& src = inputs[0]; | |||||
auto tlayout = src->layout(); | |||||
for (auto&& i : op_def.axis) { | |||||
tlayout.add_axis_cont_inplace(i); | |||||
} | |||||
// memory forward | |||||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
} | |||||
OP_TRAIT_REG(AddAxis, AddAxis) | |||||
.apply_on_var_node(apply_on_var_node) | |||||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||||
.fallback(); | |||||
} // namespace add_axis | } // namespace add_axis | ||||
} // namespace | } // namespace | ||||
@@ -97,7 +161,36 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||||
return opr::AxisAddRemove::make(inputs[0], param, config); | return opr::AxisAddRemove::make(inputs[0], param, config); | ||||
} | } | ||||
OP_TRAIT_REG(RemoveAxis, RemoveAxis).apply_on_var_node(apply_on_var_node).fallback(); | |||||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||||
auto&& op_def = def.cast_final_safe<RemoveAxis>(); | |||||
size_t nr_inp = inputs.size(); | |||||
mgb_assert(nr_inp == 1, "RemoveAxis expects 1 inputs; got %lu actually", nr_inp); | |||||
auto&& src = inputs[0]; | |||||
auto tlayout = src->layout(); | |||||
for (auto&& i : op_def.axis) { | |||||
if (tlayout.ndim == 1) { | |||||
mgb_assert( | |||||
tlayout.shape[0] == 1 && i == 0, | |||||
"can not remove axis %u from tensor of shape=%s", i, | |||||
tlayout.megdnn::TensorShape::to_string().c_str()); | |||||
} else { | |||||
mgb_assert( | |||||
i < tlayout.ndim && tlayout.shape[i] == 1, | |||||
"can not remove axis %u from tensor of shape=%s", i, | |||||
tlayout.megdnn::TensorShape::to_string().c_str()); | |||||
tlayout.remove_axis_inplace(i); | |||||
} | |||||
} | |||||
// memory forward | |||||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||||
} | |||||
OP_TRAIT_REG(RemoveAxis, RemoveAxis) | |||||
.apply_on_var_node(apply_on_var_node) | |||||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||||
.fallback(); | |||||
} // namespace remove_axis | } // namespace remove_axis | ||||
} // namespace | } // namespace | ||||
@@ -411,7 +411,7 @@ struct ComputingGraphHolder { | |||||
executable->wait(); | executable->wait(); | ||||
size_t nr_inputs = inputs.size(); | size_t nr_inputs = inputs.size(); | ||||
for (size_t i = 0; i < nr_inputs; ++i) { | for (size_t i = 0; i < nr_inputs; ++i) { | ||||
auto input_dev_tensor = input_tensors[i]->dev_tensor(); | |||||
auto input_dev_tensor = input_tensors[i]->dev_tensor(false); | |||||
inputs[i].device_value->reset( | inputs[i].device_value->reset( | ||||
input_dev_tensor.storage(), input_dev_tensor.layout()); | input_dev_tensor.storage(), input_dev_tensor.layout()); | ||||
if (inputs[i].host_value) { | if (inputs[i].host_value) { | ||||
@@ -95,7 +95,13 @@ const Blob::RawStorage& Blob::storage() { | |||||
Tensor::Tensor( | Tensor::Tensor( | ||||
BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv) | BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv) | ||||
: m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {} | |||||
: m_cn(blob->comp_node()), | |||||
m_shape(layout), | |||||
m_dtype(layout.dtype), | |||||
m_layout(layout), | |||||
m_blob(std::move(blob)), | |||||
m_offset(offset), | |||||
m_value(hv) {} | |||||
Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | ||||
constexpr int size_threshold = TensorShape::MAX_NDIM; | constexpr int size_threshold = TensorShape::MAX_NDIM; | ||||
@@ -107,7 +113,12 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||||
MGB_RECORD_EVENT( | MGB_RECORD_EVENT( | ||||
profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(), | profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(), | ||||
dev_tensor().raw_ptr()); | dev_tensor().raw_ptr()); | ||||
dev_tensor().copy_from_fixlayout(hv); | |||||
DeviceTensorStorage storage; | |||||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
storage = storage.sub(m_offset); | |||||
DeviceTensorND dv; | |||||
dv.reset(storage, m_layout); | |||||
dv.copy_from_fixlayout(hv); | |||||
// even though hv is saved in m_value, Tensor itself could be | // even though hv is saved in m_value, Tensor itself could be | ||||
// released before copy completes | // released before copy completes | ||||
MGB_RECORD_EVENT( | MGB_RECORD_EVENT( | ||||
@@ -117,25 +128,36 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||||
} | } | ||||
} | } | ||||
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) { | |||||
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) | |||||
: m_offset(dv.storage().offset()), | |||||
m_cn(dv.comp_node()), | |||||
m_shape(dv.layout()), | |||||
m_dtype(dv.layout().dtype), | |||||
m_blob(Blob::make(dv.storage())), | |||||
m_layout(dv.layout()) { | |||||
if (!hv.empty()) { | if (!hv.empty()) { | ||||
mgb_assert(dv.comp_node() == hv.comp_node()); | mgb_assert(dv.comp_node() == hv.comp_node()); | ||||
mgb_assert(dv.dtype() == hv.dtype()); | mgb_assert(dv.dtype() == hv.dtype()); | ||||
mgb_assert(dv.shape().eq_shape(hv.shape())); | mgb_assert(dv.shape().eq_shape(hv.shape())); | ||||
m_value = hv; | m_value = hv; | ||||
} | } | ||||
m_layout = dv.layout(); | |||||
m_blob = Blob::make(dv.storage()); | |||||
m_offset = dv.storage().offset(); | |||||
} | } | ||||
Tensor::Tensor(const TensorLayout& layout, const CompNode& cn) | Tensor::Tensor(const TensorLayout& layout, const CompNode& cn) | ||||
: m_layout{layout}, | : m_layout{layout}, | ||||
m_blob{Blob::make(cn, layout.span().dist_byte())}, | m_blob{Blob::make(cn, layout.span().dist_byte())}, | ||||
m_offset{0} {} | |||||
m_offset{0}, | |||||
m_cn(cn), | |||||
m_shape(layout), | |||||
m_dtype(layout.dtype) {} | |||||
Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout) | Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout) | ||||
: m_layout{layout}, m_blob{blob}, m_offset{offset} {} | |||||
: m_layout{layout}, | |||||
m_blob{blob}, | |||||
m_offset{offset}, | |||||
m_cn(blob->comp_node()), | |||||
m_shape(layout), | |||||
m_dtype(layout.dtype) {} | |||||
TensorPtr Tensor::make(const HostTensorND& hv) { | TensorPtr Tensor::make(const HostTensorND& hv) { | ||||
auto&& blob = MultiCNConstTensorCache::inst().lookup(hv); | auto&& blob = MultiCNConstTensorCache::inst().lookup(hv); | ||||
@@ -145,10 +167,45 @@ TensorPtr Tensor::make(const HostTensorND& hv) { | |||||
return std::make_shared<Tensor>(hv); | return std::make_shared<Tensor>(hv); | ||||
} | } | ||||
DeviceTensorND Tensor::dev_tensor() { | |||||
void Tensor::to_contiguous_inplace(VarNode::LayoutConstraintCallback& layout_checker) { | |||||
MGB_LOCK_GUARD(m_blob_mtx); | |||||
if (!m_layout.is_empty() && !layout_checker(m_layout)) { | |||||
DeviceTensorStorage storage; | |||||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
storage = storage.sub(m_offset); | |||||
DeviceTensorND dv; | |||||
dv.reset(storage, m_layout); | |||||
DeviceTensorND dv_contig; | |||||
dv_contig.copy_from(dv); | |||||
m_layout = dv_contig.layout(); | |||||
std::atomic_store(&m_blob, Blob::make(dv_contig.storage())); | |||||
mgb_assert(m_layout.is_contiguous()); | |||||
m_offset = 0; | |||||
} | |||||
} | |||||
void Tensor::to_contiguous_inplace() { | |||||
static VarNode::LayoutConstraintCallback default_cb = | |||||
[](const TensorLayout& layout) { return layout.is_contiguous(); }; | |||||
to_contiguous_inplace(default_cb); | |||||
} | |||||
void Tensor::assign_from_dev_tensor(DeviceTensorND dv) { | |||||
MGB_LOCK_GUARD(m_blob_mtx); | |||||
std::atomic_store(&m_blob, Blob::make(dv.storage())); | |||||
m_offset = dv.storage().offset(); | |||||
m_layout = dv.layout(); | |||||
} | |||||
DeviceTensorND Tensor::dev_tensor(bool contiguous) { | |||||
mgb_assert(m_blob, "uninitialized tensor."); | mgb_assert(m_blob, "uninitialized tensor."); | ||||
if (contiguous) { | |||||
to_contiguous_inplace(); | |||||
} | |||||
MGB_LOCK_GUARD(m_blob_mtx); | |||||
DeviceTensorStorage storage; | DeviceTensorStorage storage; | ||||
storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage()); | |||||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
storage = storage.sub(m_offset); | storage = storage.sub(m_offset); | ||||
DeviceTensorND ret; | DeviceTensorND ret; | ||||
ret.reset(storage, m_layout); | ret.reset(storage, m_layout); | ||||
@@ -156,16 +213,22 @@ DeviceTensorND Tensor::dev_tensor() { | |||||
} | } | ||||
void Tensor::fetch_value() { | void Tensor::fetch_value() { | ||||
MGB_LOCK_GUARD(m_mtx); | |||||
MGB_LOCK_GUARD(m_blob_mtx); | |||||
MGB_LOCK_GUARD(m_value_mtx); | |||||
if (m_value.empty()) { | if (m_value.empty()) { | ||||
m_value.copy_from(dev_tensor()); | |||||
DeviceTensorStorage storage; | |||||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||||
storage = storage.sub(m_offset); | |||||
DeviceTensorND dv; | |||||
dv.reset(storage, m_layout); | |||||
m_value.copy_from(dv); | |||||
m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); | m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); | ||||
m_value_ready->record(); | m_value_ready->record(); | ||||
} | } | ||||
} | } | ||||
bool Tensor::value_fetched() { | bool Tensor::value_fetched() { | ||||
MGB_LOCK_GUARD(m_mtx); | |||||
MGB_LOCK_GUARD(m_value_mtx); | |||||
return m_value.layout().ndim != 0; | return m_value.layout().ndim != 0; | ||||
} | } | ||||
@@ -178,7 +241,7 @@ const HostTensorND& Tensor::get_value() { | |||||
} | } | ||||
const HostTensorND* Tensor::try_get_value() { | const HostTensorND* Tensor::try_get_value() { | ||||
MGB_LOCK_GUARD(m_mtx); | |||||
MGB_LOCK_GUARD(m_value_mtx); | |||||
if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) { | if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) { | ||||
return &m_value; | return &m_value; | ||||
} | } | ||||
@@ -193,7 +256,7 @@ TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) { | |||||
} | } | ||||
TensorPtr Tensor::sub(size_t offset, TensorShape shape) { | TensorPtr Tensor::sub(size_t offset, TensorShape shape) { | ||||
TensorLayout layout(shape, m_layout.dtype); | |||||
TensorLayout layout(shape, m_dtype); | |||||
return Tensor::make(m_blob, offset + m_offset, layout); | return Tensor::make(m_blob, offset + m_offset, layout); | ||||
} | } | ||||
@@ -73,7 +73,7 @@ public: | |||||
static SymbolVar make(ComputingGraph& graph, Tensor& tensor) { | static SymbolVar make(ComputingGraph& graph, Tensor& tensor) { | ||||
auto opr = graph.insert_opr(std::make_unique<InputPlaceholder>(graph, &tensor)); | auto opr = graph.insert_opr(std::make_unique<InputPlaceholder>(graph, &tensor)); | ||||
auto var = opr->output(0); | auto var = opr->output(0); | ||||
auto&& dev_tensor = tensor.dev_tensor(); | |||||
auto&& dev_tensor = tensor.dev_tensor(false); | |||||
var->m_comp_node = dev_tensor.comp_node(); | var->m_comp_node = dev_tensor.comp_node(); | ||||
var->m_shape = dev_tensor.shape(); | var->m_shape = dev_tensor.shape(); | ||||
if (dev_tensor.empty()) { | if (dev_tensor.empty()) { | ||||
@@ -81,10 +81,7 @@ public: | |||||
layout.init_contiguous_stride(); | layout.init_contiguous_stride(); | ||||
dev_tensor.reset(dev_tensor.storage(), layout); | dev_tensor.reset(dev_tensor.storage(), layout); | ||||
} | } | ||||
var->m_dev_tensor = dev_tensor; | |||||
var->m_mem_plan.reset_from_owner_var() | |||||
.chunk() | |||||
.mem_alloc_status.set_from_owner_var(); | |||||
var->force_assign_dev_tensor_from_tensor(dev_tensor); | |||||
return var; | return var; | ||||
} | } | ||||
@@ -314,15 +314,11 @@ public: | |||||
size_t idx = 0; | size_t idx = 0; | ||||
for (auto&& input : opr_inputs) { | for (auto&& input : opr_inputs) { | ||||
mgb_assert(input->owner_opr()->same_type<InputPlaceholder>()); | mgb_assert(input->owner_opr()->same_type<InputPlaceholder>()); | ||||
input->m_dev_tensor.storage({}); | |||||
auto&& dev_tensor = inputs[input_remap[idx]]->dev_tensor(); | |||||
auto&& dev_tensor = inputs[input_remap[idx]]->dev_tensor(false); | |||||
auto&& layout = dev_tensor.layout(); | auto&& layout = dev_tensor.layout(); | ||||
input->shape(dev_tensor.shape()); | input->shape(dev_tensor.shape()); | ||||
auto&& chk = input->m_mem_plan.reset_from_owner_var().chunk(); | |||||
input->m_dev_tensor.reset(dev_tensor.storage(), layout); | |||||
input->m_mem_plan.layout(layout); | |||||
chk.mem_alloc_status.set_from_owner_var(); | |||||
input->force_assign_dev_tensor_from_tensor(dev_tensor); | |||||
mgb_assert(input->comp_node() == dev_tensor.comp_node()); | mgb_assert(input->comp_node() == dev_tensor.comp_node()); | ||||
mgb_assert(input->shape().eq_shape(layout)); | mgb_assert(input->shape().eq_shape(layout)); | ||||
@@ -335,9 +331,14 @@ public: | |||||
mgb_assert(m_opr->usable_output().size() == outputs.size()); | mgb_assert(m_opr->usable_output().size() == outputs.size()); | ||||
::mgb::opr::intl::WorkspaceLimitHook::set_impl( | ::mgb::opr::intl::WorkspaceLimitHook::set_impl( | ||||
m_opr->owner_graph(), get_workspace_limit); | m_opr->owner_graph(), get_workspace_limit); | ||||
size_t j = 0; | |||||
for (auto&& var : m_opr->output()) { | for (auto&& var : m_opr->output()) { | ||||
auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk(); | auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk(); | ||||
chk.mem_alloc_status.set_from_owner_var(); | |||||
} | |||||
m_opr->mem_plan_fwd_in2out_readonly(); | |||||
size_t j = 0; | |||||
for (auto&& var : m_opr->output()) { | |||||
if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { | if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { | ||||
TensorLayout layout{var->shape(), var->dtype(), var->format()}; | TensorLayout layout{var->shape(), var->dtype(), var->format()}; | ||||
var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag( | var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag( | ||||
@@ -349,18 +350,16 @@ public: | |||||
mgb_assert(var->comp_node() == tensor->comp_node()); | mgb_assert(var->comp_node() == tensor->comp_node()); | ||||
mgb_assert(var->shape().eq_shape(layout)); | mgb_assert(var->shape().eq_shape(layout)); | ||||
mgb_assert(var->dtype() == layout.dtype); | mgb_assert(var->dtype() == layout.dtype); | ||||
var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); | |||||
if (var->m_mem_plan.chunk().owner_var != var) { | |||||
tensor->assign_from_dev_tensor( | |||||
var->m_dev_tensor); // memory forwarding | |||||
} else { | |||||
var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); | |||||
} | |||||
++j; | ++j; | ||||
} | } | ||||
chk.mem_alloc_status.set_from_owner_var(); | |||||
} | } | ||||
mgb_assert(j == outputs.size()); | mgb_assert(j == outputs.size()); | ||||
// Memory forwarding was bypassed in megbrain with graph option | |||||
// imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly | |||||
// to initialize some opr(e.g. Subtensor)'s internal state | |||||
// TODO: implement memory forwarding | |||||
m_opr->mem_plan_fwd_in2out_readonly(); | |||||
{ | { | ||||
// some opr (e.g. Reduce) rely on on_mem_status_changed to set | // some opr (e.g. Reduce) rely on on_mem_status_changed to set | ||||
// input/output tensor corretly, since we bypass var_node_mem_mgr | // input/output tensor corretly, since we bypass var_node_mem_mgr | ||||
@@ -840,7 +839,7 @@ public: | |||||
Tensor::make(output_descs[i].layout, output_descs[i].comp_node); | Tensor::make(output_descs[i].layout, output_descs[i].comp_node); | ||||
} | } | ||||
auto raw_outputs = to_raw_ptr_array(outputs); | |||||
auto raw_outputs = to_raw_ptr_array(outputs, false); | |||||
CompNode::UnorderedSet used_cns; | CompNode::UnorderedSet used_cns; | ||||
for (auto&& out : raw_outputs) { | for (auto&& out : raw_outputs) { | ||||
auto cn = out->comp_node(); | auto cn = out->comp_node(); | ||||
@@ -9,8 +9,12 @@ | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
*/ | */ | ||||
#include "../mgb_cg_impl.h" | |||||
#include "./mini_graph.h" | #include "./mini_graph.h" | ||||
#include "megbrain/opr/io.h" | |||||
using LayoutConstraintLevel = mgb::cg::VarNodeMemManager::LayoutConstraintLevel; | |||||
using LayoutConstraintCallback = mgb::VarNode::LayoutConstraintCallback; | |||||
namespace mgb::imperative::proxy_graph { | namespace mgb::imperative::proxy_graph { | ||||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); | ||||
@@ -34,4 +38,81 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
return ret; | return ret; | ||||
} | } | ||||
std::unordered_map<size_t, SmallVector<LayoutConstraintCallback>> | |||||
input_layout_constraints_cache; | |||||
SmallVector<LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
auto get_input_layout_constraint_hash_key = | |||||
[](const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
XXHash state; | |||||
size_t length = 0, data[1 + inputs.size()]; | |||||
data[length++] = def.hash(); | |||||
for (auto&& i : inputs) { | |||||
data[length++] = mgb::hash(i->comp_node()); | |||||
} | |||||
state.update(data, length * sizeof(size_t)); | |||||
return state.digest(); | |||||
}; | |||||
auto hash_key = get_input_layout_constraint_hash_key(def, inputs); | |||||
auto&& iter = input_layout_constraints_cache.find(hash_key); | |||||
if (iter != input_layout_constraints_cache.end()) { | |||||
return iter->second; | |||||
} | |||||
static cg::ComputingGraphImpl* graph = | |||||
imperative::ResourceManager::create_global<cg::ComputingGraphImpl>(); | |||||
VarNodeArray vinputs(inputs.size()); | |||||
for (size_t i = 0; i < inputs.size(); ++i) { | |||||
OperatorNodeConfig config; | |||||
auto&& layout = inputs[i]->layout(); | |||||
layout.init_contiguous_stride(); | |||||
vinputs[i] = graph->insert_opr(std::make_unique<mgb::opr::SharedDeviceTensor>( | |||||
*graph, | |||||
std::make_shared<DeviceTensorND>( | |||||
inputs[i]->comp_node(), layout), | |||||
false, config)) | |||||
->output(0); | |||||
} | |||||
auto&& opr = OpDef::apply_on_var_node(def, vinputs)[0]->owner_opr(); | |||||
opr->add_input_layout_constraint(); | |||||
SmallVector<LayoutConstraintCallback> res(inputs.size()); | |||||
auto& mem_mgr = graph->var_node_mem_manager(); | |||||
for (size_t i = 0; i < vinputs.size(); ++i) { | |||||
auto& trait = mem_mgr.get_var_node_mem_trait(vinputs[i]); | |||||
switch (trait.layout_constraint.level) { | |||||
case LayoutConstraintLevel::CONTIG: | |||||
res[i] = [](const TensorLayout& layout) { | |||||
return layout.is_contiguous(); | |||||
}; | |||||
break; | |||||
case LayoutConstraintLevel::MONOTONE: | |||||
res[i] = [&trait](const TensorLayout& layout) { | |||||
if (!layout.is_abs_monotonous_allow_brdcst()) { | |||||
return false; | |||||
} | |||||
for (auto&& i : trait.layout_constraint.custom) | |||||
if (!i(layout)) | |||||
return false; | |||||
return true; | |||||
}; | |||||
break; | |||||
case LayoutConstraintLevel::NONE: | |||||
if (!trait.layout_constraint.custom.empty()) { | |||||
res[i] = [&trait](const TensorLayout& layout) { | |||||
for (auto&& i : trait.layout_constraint.custom) | |||||
if (!i(layout)) | |||||
return false; | |||||
return true; | |||||
}; | |||||
} | |||||
break; | |||||
default: | |||||
mgb_throw(InternalError, "invalid layout_constraint_level"); | |||||
} | |||||
} | |||||
input_layout_constraints_cache.emplace(hash_key, res); | |||||
return res; | |||||
} | |||||
} // namespace mgb::imperative::proxy_graph_detail | } // namespace mgb::imperative::proxy_graph_detail |
@@ -17,6 +17,8 @@ | |||||
#include "./op_trait.h" | #include "./op_trait.h" | ||||
using LayoutConstraintCallback = mgb::VarNode::LayoutConstraintCallback; | |||||
namespace mgb { | namespace mgb { | ||||
namespace imperative { | namespace imperative { | ||||
namespace subgraph_detail { | namespace subgraph_detail { | ||||
@@ -73,6 +75,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
const std::shared_ptr<OpDef>& op, | const std::shared_ptr<OpDef>& op, | ||||
const SmallVector<TensorPtr>& inputs, | const SmallVector<TensorPtr>& inputs, | ||||
size_t nr_outputs) { | size_t nr_outputs) { | ||||
auto&& constraints = OpDef::get_input_layout_constraint(*op, inputs); | |||||
for (size_t idx = 0; idx < inputs.size(); ++idx) { | |||||
auto&& layout_checker = constraints[idx]; | |||||
if (layout_checker) { | |||||
inputs[idx]->to_contiguous_inplace(layout_checker); | |||||
} | |||||
} | |||||
// do not use infered output_desc in subgraph | // do not use infered output_desc in subgraph | ||||
return OpDef::apply_on_physical_tensor(*op, inputs, output_descs, false); | return OpDef::apply_on_physical_tensor(*op, inputs, output_descs, false); | ||||
}; | }; | ||||
@@ -81,6 +90,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
return outputs; | return outputs; | ||||
} | } | ||||
SmallVector<LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||||
SmallVector<LayoutConstraintCallback> res(inputs.size()); | |||||
return res; | |||||
} | |||||
static EncodedSubgraph make_backward_graph_from_forward( | static EncodedSubgraph make_backward_graph_from_forward( | ||||
const SmallVector<LogicalTensorDesc>& inputs, | const SmallVector<LogicalTensorDesc>& inputs, | ||||
const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
@@ -78,6 +78,9 @@ public: | |||||
static EncodedSubgraph make_forward_graph( | static EncodedSubgraph make_forward_graph( | ||||
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs); | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs); | ||||
static SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||||
const OpTrait* trait() const; | const OpTrait* trait() const; | ||||
std::string to_string() const; | std::string to_string() const; | ||||
@@ -14,6 +14,7 @@ | |||||
#include <memory> | #include <memory> | ||||
#include <mutex> | #include <mutex> | ||||
#include "megbrain/graph.h" | |||||
#include "megbrain/imperative/resource_manager.h" | #include "megbrain/imperative/resource_manager.h" | ||||
#include "megbrain/tensor.h" | #include "megbrain/tensor.h" | ||||
@@ -90,18 +91,24 @@ public: | |||||
CompNode comp_node() const { | CompNode comp_node() const { | ||||
mgb_assert(m_blob, "uninitialized tensor."); | mgb_assert(m_blob, "uninitialized tensor."); | ||||
return m_blob->comp_node(); | |||||
return m_cn; | |||||
} | } | ||||
DType dtype() const { return m_layout.dtype; } | |||||
DType dtype() const { return m_dtype; } | |||||
TensorLayout layout() const { return m_layout; } | TensorLayout layout() const { return m_layout; } | ||||
const TensorShape& shape() const { return m_layout; } | |||||
const TensorShape& shape() const { return m_shape; } | |||||
size_t offset() const { return m_offset; } | size_t offset() const { return m_offset; } | ||||
DeviceTensorND dev_tensor(); | |||||
void to_contiguous_inplace(VarNode::LayoutConstraintCallback&); | |||||
void to_contiguous_inplace(); | |||||
DeviceTensorND dev_tensor(bool contiguous = true); | |||||
void assign_from_dev_tensor(DeviceTensorND); | |||||
static TensorPtr make_scalar(DTypeScalar value, CompNode cn); | static TensorPtr make_scalar(DTypeScalar value, CompNode cn); | ||||
@@ -110,7 +117,7 @@ public: | |||||
return make_scalar(value, m_blob->comp_node()); | return make_scalar(value, m_blob->comp_node()); | ||||
} | } | ||||
BlobPtr& blob() { return m_blob; } | |||||
BlobPtr blob() { return m_blob; } | |||||
void fetch_value(); | void fetch_value(); | ||||
bool value_fetched(); | bool value_fetched(); | ||||
@@ -131,10 +138,16 @@ public: | |||||
static void static_initialize(); | static void static_initialize(); | ||||
private: | private: | ||||
TensorLayout m_layout; | |||||
BlobPtr m_blob; | |||||
size_t m_offset; | size_t m_offset; | ||||
std::mutex m_mtx; | |||||
const CompNode m_cn; | |||||
const TensorShape m_shape; | |||||
const DType m_dtype; | |||||
std::mutex m_blob_mtx; | |||||
BlobPtr m_blob; | |||||
TensorLayout m_layout; | |||||
std::mutex m_value_mtx; | |||||
HostTensorND m_value; | HostTensorND m_value; | ||||
EventPtr m_value_ready = nullptr; | EventPtr m_value_ready = nullptr; | ||||
}; | }; | ||||
@@ -33,6 +33,9 @@ EncodedSubgraph make_backward_graph( | |||||
const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
const SmallVector<bool>& output_has_grad); | const SmallVector<bool>& output_has_grad); | ||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||||
} // namespace proxy_graph_detail | } // namespace proxy_graph_detail | ||||
} // namespace imperative | } // namespace imperative | ||||
} // namespace mgb | } // namespace mgb | ||||
@@ -36,6 +36,9 @@ EncodedSubgraph make_backward_graph( | |||||
const SmallVector<bool>& input_requires_grad, | const SmallVector<bool>& input_requires_grad, | ||||
const SmallVector<bool>& output_has_grad); | const SmallVector<bool>& output_has_grad); | ||||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||||
const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||||
} // namespace subgraph_detail | } // namespace subgraph_detail | ||||
} // namespace imperative | } // namespace imperative | ||||
} // namespace mgb | } // namespace mgb |
@@ -322,7 +322,7 @@ void ComputingGraphImpl::free_varnode_storage(void* ptr) { | |||||
m_var_node_pool.free_raw(ptr); | m_var_node_pool.free_raw(ptr); | ||||
}; | }; | ||||
OperatorNodeBase* ComputingGraphImpl::insert_opr( | |||||
MGE_WIN_DECLSPEC_FUC OperatorNodeBase* ComputingGraphImpl::insert_opr( | |||||
std::unique_ptr<OperatorNodeBase> opr_uniqp) { | std::unique_ptr<OperatorNodeBase> opr_uniqp) { | ||||
auto opr = opr_uniqp.get(); | auto opr = opr_uniqp.get(); | ||||
@@ -148,8 +148,8 @@ class ComputingGraphImpl final : public ComputingGraph { | |||||
public: | public: | ||||
class ComputingSequence; | class ComputingSequence; | ||||
ComputingGraphImpl(); | |||||
~ComputingGraphImpl(); | |||||
MGE_WIN_DECLSPEC_FUC ComputingGraphImpl(); | |||||
MGE_WIN_DECLSPEC_FUC ~ComputingGraphImpl(); | |||||
template <typename T> | template <typename T> | ||||
static ComputingGraphImpl* downcast(T* ptr) = delete; | static ComputingGraphImpl* downcast(T* ptr) = delete; | ||||
@@ -166,7 +166,8 @@ public: | |||||
SmallVector<std::unique_ptr<AsyncExecutable>> compile_multi_part( | SmallVector<std::unique_ptr<AsyncExecutable>> compile_multi_part( | ||||
const SmallVector<OutputSpec>& out_specs) override; | const SmallVector<OutputSpec>& out_specs) override; | ||||
OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr) override; | |||||
MGE_WIN_DECLSPEC_FUC OperatorNodeBase* insert_opr( | |||||
std::unique_ptr<OperatorNodeBase> opr) override; | |||||
void* alloc_varnode_storage() override; | void* alloc_varnode_storage() override; | ||||
@@ -93,6 +93,23 @@ MemAllocPlan& MemAllocPlan::assign_for_forward( | |||||
return *this; | return *this; | ||||
} | } | ||||
MemAllocPlan& MemAllocPlan::force_assign_for_forward( | |||||
const MemAllocPlan& src, const SubTensorSpec& sub) { | |||||
mgb_assert(valid() && src.valid() && m_layout.eq_shape(sub.layout())); | |||||
++(m_chunk = src.m_chunk)->m_refcnt; | |||||
m_layout = sub.layout(); | |||||
// make layout strong-contig | |||||
for (int i = static_cast<int>(m_layout.ndim) - 1; i >= 0; --i) { | |||||
if (m_layout.shape[i] == 1) { | |||||
m_layout.stride[i] = i + 1 < static_cast<int>(m_layout.ndim) | |||||
? m_layout.stride[i + 1] * m_layout.shape[i + 1] | |||||
: 1; | |||||
} | |||||
} | |||||
m_layout.dtype = dtype(); | |||||
return *this; | |||||
} | |||||
MemAllocPlan& MemAllocPlan::reset_from_owner_var() { | MemAllocPlan& MemAllocPlan::reset_from_owner_var() { | ||||
auto owner_var = m_chunk_storage.owner_var; | auto owner_var = m_chunk_storage.owner_var; | ||||
m_layout.dtype = dtype(); | m_layout.dtype = dtype(); | ||||
@@ -223,7 +240,12 @@ VarNode& VarNode::format(TensorFormat format) { | |||||
bool VarNode::set_fwd_in2out_readonly(VarNode* input, const SubTensorSpec& sub) { | bool VarNode::set_fwd_in2out_readonly(VarNode* input, const SubTensorSpec& sub) { | ||||
if (owner_graph()->options().imperative_proxy_graph) { | if (owner_graph()->options().imperative_proxy_graph) { | ||||
return false; | |||||
if (input->comp_node() != comp_node()) { | |||||
return false; | |||||
} | |||||
m_mem_plan.force_assign_for_forward(input->m_mem_plan, sub); | |||||
m_dev_tensor = input->dev_tensor().sub(sub); | |||||
return true; | |||||
} | } | ||||
return ComputingGraphImpl::downcast(owner_graph()) | return ComputingGraphImpl::downcast(owner_graph()) | ||||
->var_node_mem_manager() | ->var_node_mem_manager() | ||||
@@ -361,6 +383,13 @@ VarNode& VarNode::reset_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||||
return *this; | return *this; | ||||
} | } | ||||
void VarNode::force_assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||||
m_dev_tensor = value; | |||||
shape(value.shape()); | |||||
m_mem_plan.reset_from_owner_var().chunk().mem_alloc_status.set_from_owner_var(); | |||||
m_mem_plan.layout(value.layout()); | |||||
} | |||||
void VarNode::assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | void VarNode::assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | ||||
mgb_assert( | mgb_assert( | ||||
(value.layout().is_contiguous() || value.empty()) && | (value.layout().is_contiguous() || value.empty()) && | ||||
@@ -475,7 +475,7 @@ DEF(CompNode node, const TensorShape& shape, DType dtype, TensorFormat format) | |||||
DEF(CompNode node, const TensorLayout& layout) | DEF(CompNode node, const TensorLayout& layout) | ||||
: TensorND(node, layout, layout.dtype, layout.format) { | : TensorND(node, layout, layout.dtype, layout.format) { | ||||
mgb_assert( | mgb_assert( | ||||
layout.is_contiguous(), | |||||
layout.is_contiguous() || layout.is_empty(), | |||||
"non-contiguous layout used for initializing a tensor: %s", | "non-contiguous layout used for initializing a tensor: %s", | ||||
layout.to_string().c_str()); | layout.to_string().c_str()); | ||||
} | } | ||||
@@ -241,7 +241,8 @@ public: | |||||
* \return the node in the graph (maybe another node due to | * \return the node in the graph (maybe another node due to | ||||
* deduplication) | * deduplication) | ||||
*/ | */ | ||||
virtual OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr) = 0; | |||||
MGE_WIN_DECLSPEC_FUC virtual OperatorNodeBase* insert_opr( | |||||
std::unique_ptr<OperatorNodeBase> opr) = 0; | |||||
/*! | /*! | ||||
* \brief used by OperatorNodeBase to allocate its outputs | * \brief used by OperatorNodeBase to allocate its outputs | ||||
@@ -194,6 +194,10 @@ public: | |||||
MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign_for_forward( | MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign_for_forward( | ||||
const MemAllocPlan& src, const SubTensorSpec& sub); | const MemAllocPlan& src, const SubTensorSpec& sub); | ||||
//! force assign for readonly forward | |||||
MGE_WIN_DECLSPEC_FUC MemAllocPlan& force_assign_for_forward( | |||||
const MemAllocPlan& src, const SubTensorSpec& sub); | |||||
/*! | /*! | ||||
* \brief next readonly-forward reader of this MemAllocPlan | * \brief next readonly-forward reader of this MemAllocPlan | ||||
* | * | ||||
@@ -509,6 +513,9 @@ public: | |||||
//! NO_SYS_MEM_ALLOC can be modified. | //! NO_SYS_MEM_ALLOC can be modified. | ||||
MGE_WIN_DECLSPEC_FUC bool is_graph_dest_varnode(); | MGE_WIN_DECLSPEC_FUC bool is_graph_dest_varnode(); | ||||
MGE_WIN_DECLSPEC_FUC void force_assign_dev_tensor_from_tensor( | |||||
const DeviceTensorND& value); | |||||
private: | private: | ||||
//! whether its memory should be allocated by mgb system during graph | //! whether its memory should be allocated by mgb system during graph | ||||
//! execution; initialized in VarNodeMemManager::reset_opr_seq() | //! execution; initialized in VarNodeMemManager::reset_opr_seq() | ||||
@@ -24,7 +24,7 @@ namespace intl { | |||||
* \brief base class for IO nodes between device and host | * \brief base class for IO nodes between device and host | ||||
*/ | */ | ||||
class HostIONodeBase : public cg::SingleCNOperatorNodeBase { | class HostIONodeBase : public cg::SingleCNOperatorNodeBase { | ||||
void init_output_static_infer_desc() override final; | |||||
MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override final; | |||||
protected: | protected: | ||||
using cg::SingleCNOperatorNodeBase::SingleCNOperatorNodeBase; | using cg::SingleCNOperatorNodeBase::SingleCNOperatorNodeBase; | ||||
@@ -32,9 +32,10 @@ protected: | |||||
/*! | /*! | ||||
* \brief src_type for static shape and value infer | * \brief src_type for static shape and value infer | ||||
*/ | */ | ||||
virtual cg::static_infer::SourceType static_infer_src_type() const; | |||||
MGE_WIN_DECLSPEC_FUC virtual cg::static_infer::SourceType static_infer_src_type() | |||||
const; | |||||
virtual const TensorShape& get_output_shape() = 0; | |||||
MGE_WIN_DECLSPEC_FUC virtual const TensorShape& get_output_shape() = 0; | |||||
/*! | /*! | ||||
* \brief fill value in *dest* for static inference | * \brief fill value in *dest* for static inference | ||||
@@ -52,10 +53,10 @@ protected: | |||||
class DeviceTensorHolder : public HostIONodeBase { | class DeviceTensorHolder : public HostIONodeBase { | ||||
class DevValueExecDep; | class DevValueExecDep; | ||||
void init_output_format() override; | |||||
void init_output_mem_plan(bool dynamic) override final; | |||||
void scn_do_execute() override final; | |||||
void record_execute_deps(ExecDependencyArray& deps) override; | |||||
MGE_WIN_DECLSPEC_FUC void init_output_format() override; | |||||
MGE_WIN_DECLSPEC_FUC void init_output_mem_plan(bool dynamic) override final; | |||||
MGE_WIN_DECLSPEC_FUC void scn_do_execute() override final; | |||||
MGE_WIN_DECLSPEC_FUC void record_execute_deps(ExecDependencyArray& deps) override; | |||||
protected: | protected: | ||||
using HostIONodeBase::HostIONodeBase; | using HostIONodeBase::HostIONodeBase; | ||||
@@ -77,20 +78,20 @@ MGB_DEFINE_CLS_WITH_SUPER(SharedDeviceTensorBase, DeviceTensorHolder) // { | |||||
std::shared_ptr<DeviceTensorND> m_dev_data; | std::shared_ptr<DeviceTensorND> m_dev_data; | ||||
bool m_const_value; | bool m_const_value; | ||||
const TensorShape& get_output_shape() override; | |||||
MGE_WIN_DECLSPEC_FUC const TensorShape& get_output_shape() override; | |||||
bool fill_in_static_infer(DeviceTensorND* dest) override { | bool fill_in_static_infer(DeviceTensorND* dest) override { | ||||
MGB_MARK_USED_VAR(dest); | MGB_MARK_USED_VAR(dest); | ||||
return false; | return false; | ||||
} | } | ||||
void init_output_comp_node() override; | |||||
MGE_WIN_DECLSPEC_FUC void init_output_comp_node() override; | |||||
public: | public: | ||||
//! const_value marks whether the device value of this operator should | //! const_value marks whether the device value of this operator should | ||||
//! be treated as constant during graph execution. Should be false in | //! be treated as constant during graph execution. Should be false in | ||||
//! most cases. | //! most cases. | ||||
SharedDeviceTensorBase( | |||||
MGE_WIN_DECLSPEC_FUC SharedDeviceTensorBase( | |||||
ComputingGraph& graph, const std::shared_ptr<DeviceTensorND>& dev_data, | ComputingGraph& graph, const std::shared_ptr<DeviceTensorND>& dev_data, | ||||
bool const_value, const OperatorNodeConfig& config); | bool const_value, const OperatorNodeConfig& config); | ||||
@@ -248,7 +249,8 @@ private: | |||||
*/ | */ | ||||
MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | ||||
SharedDeviceTensor, intl::SharedDeviceTensorBase) // { | SharedDeviceTensor, intl::SharedDeviceTensorBase) // { | ||||
cg::static_infer::SourceType static_infer_src_type() const override; | |||||
MGE_WIN_DECLSPEC_FUC cg::static_infer::SourceType static_infer_src_type() | |||||
const override; | |||||
public: | public: | ||||
using Super::Super; | using Super::Super; | ||||