GitOrigin-RevId: 7c1993979c
tags/v1.9.0
@@ -285,7 +285,8 @@ struct TensorLayout : public TensorShape { | |||
* stride | |||
*/ | |||
void add_axis_cont_inplace(size_t axis) { | |||
add_axis_inplace(axis, 1, stride[axis] * shape[axis]); | |||
ptrdiff_t stride_ = axis < ndim ? stride[axis] * shape[axis] : 1; | |||
add_axis_inplace(axis, 1, stride_); | |||
} | |||
/*! | |||
@@ -382,7 +382,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const { | |||
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); | |||
auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) { | |||
return (shape0 == shape1) & ((shape0 == 1) | (stride0 == stride1)); | |||
return (shape0 == shape1) & ((shape0 <= 1) | (stride0 == stride1)); | |||
}; | |||
if (ndim == rhs.ndim) { | |||
size_t eq = 0; | |||
@@ -13,7 +13,8 @@ | |||
using namespace megdnn; | |||
const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle(int debug_level) { | |||
MGE_WIN_DECLSPEC_FUC const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle( | |||
int debug_level) { | |||
auto make = [](int deb_level) { | |||
megcoreDeviceHandle_t dev_handle; | |||
megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | |||
@@ -32,6 +32,7 @@ | |||
#include "./module_trace.h" | |||
#include "./numpy_dtypes.h" | |||
#include "./tensor.h" | |||
#include "./tensor_utils.h" | |||
#include "./transformation.h" | |||
#include <object.h> | |||
@@ -549,557 +550,6 @@ CompNode _get_device(PyObject* const* args, size_t nargs) { | |||
return cn; | |||
} | |||
bool is_scalar(PyObject* tensor) { | |||
if (py::isinstance<PySymbolVar>(py::handle(tensor))) { | |||
auto var = py::handle(tensor).cast<PySymbolVar*>(); | |||
return var->is_scalar; | |||
} | |||
auto* tw = TensorWrapper::try_cast(tensor); | |||
if (tw) { | |||
return tw->m_tensor->is_scalar(); | |||
} | |||
return PyArray_CheckAnyScalar(tensor); | |||
} | |||
bool is_bool_list(PyObject* arg) { | |||
if (!PyList_Check(arg)) { | |||
return false; | |||
} | |||
size_t sz = PyList_Size(arg); | |||
if (!sz) { | |||
return false; | |||
} | |||
for (size_t i = 0; i < sz; ++i) { | |||
PyObject* handle = PyList_GetItem(arg, i); | |||
if (!PyBool_Check(handle)) { | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
bool is_bool_dtype(PyObject* args) { | |||
if (!PyObject_HasAttrString(args, "dtype")) | |||
return false; | |||
PyObject* dobj = PyObject_GetAttrString(args, "dtype"); | |||
PyArray_Descr* dtype; | |||
PyArray_DescrConverter(dobj, &dtype); | |||
bool ret = (dtype->kind == 'b'); | |||
Py_XDECREF(dtype); | |||
Py_XDECREF(dobj); | |||
return ret; | |||
} | |||
py::object _Const( | |||
py::handle value, py::handle dtype, py::handle device, py::handle ref) { | |||
py::object val = py::reinterpret_borrow<py::object>(value); | |||
if (PyArray_Check(value.ptr())) { | |||
py::tuple strides = | |||
py::reinterpret_borrow<py::tuple>(getattr(value, "strides")); | |||
bool need_squeeze = false; | |||
for (size_t i = 0; i < strides.size(); ++i) { | |||
if (strides[i].cast<ptrdiff_t>() == 0) { | |||
need_squeeze = true; | |||
} | |||
} | |||
if (need_squeeze) { | |||
val = py::reinterpret_borrow<py::array>(value); | |||
val = val.attr("squeeze")(); | |||
val = val.attr("reshape")(val.attr("shape")); | |||
} | |||
} | |||
if (py::isinstance<PySymbolVar>(ref)) { | |||
auto ref_var = ref.cast<PySymbolVar*>(); | |||
auto* graph = ref_var->m_node->owner_graph(); | |||
auto cn = device.cast<CompNode>(); | |||
OperatorNodeConfig config(cn); | |||
auto hv = npy::np2tensor( | |||
val.ptr(), npy::Meth::borrow(cn), dtype.cast<mgb::DType>()); | |||
auto typeobj = ref.get_type(); | |||
return typeobj(opr::ImmutableTensor::make(*graph, hv, config).node()); | |||
} | |||
py::tuple tup = py::make_tuple(val, dtype, device, true, false, py::none()); | |||
return TensorWrapper::make(py_tensor_type, tup.ptr(), nullptr); | |||
} | |||
py::tuple _make_shape_tuple(py::handle shape) { | |||
py::list orig; | |||
py::list ret(0); | |||
auto solve_one = [&](py::handle val) { | |||
if (TensorWrapper::try_cast(val.ptr()) || py::isinstance<PySymbolVar>(val)) { | |||
py::object np = getattr(val, "numpy")(); | |||
PyArrayObject* arr = (PyArrayObject*)np.ptr(); | |||
PyObject* maybe_list = PyArray_ToList(arr); | |||
if (PyList_Check(maybe_list)) { | |||
py::list may = py::reinterpret_steal<py::list>(maybe_list); | |||
for (size_t i = 0; i < may.size(); ++i) { | |||
ret.append(may[i]); | |||
} | |||
} else { | |||
mgb_assert(PyLong_Check(maybe_list)); | |||
ret.append(PyLong_AsLong(maybe_list)); | |||
Py_XDECREF(maybe_list); | |||
} | |||
} else if (PyArray_Check(val.ptr())) { | |||
ret.append(PyArray_PyIntAsInt(val.ptr())); | |||
} else { | |||
ret.append(PyLong_AsLong(val.ptr())); | |||
} | |||
}; | |||
if (PyArray_Check(shape.ptr()) && !PyArray_CheckAnyScalar(shape.ptr())) { | |||
orig = py::reinterpret_steal<py::list>( | |||
PyArray_ToList((PyArrayObject*)shape.ptr())); | |||
for (size_t i = 0; i < orig.size(); ++i) { | |||
solve_one(orig[i]); | |||
} | |||
} else if (PyList_Check(shape.ptr())) { | |||
orig = py::reinterpret_borrow<py::list>(shape); | |||
for (size_t i = 0; i < orig.size(); ++i) { | |||
solve_one(orig[i]); | |||
} | |||
} else if (PyTuple_Check(shape.ptr())) { | |||
py::tuple tup = py::reinterpret_borrow<py::tuple>(shape); | |||
for (size_t i = 0; i < tup.size(); ++i) { | |||
solve_one(tup[i]); | |||
} | |||
} else { | |||
solve_one(shape); | |||
} | |||
return py::reinterpret_steal<py::tuple>(PyList_AsTuple(ret.ptr())); | |||
} | |||
py::object _get_index(py::object tensor, py::object src) { | |||
if (!TensorWrapper::try_cast(tensor.ptr()) && | |||
!py::isinstance<PySymbolVar>(tensor)) { | |||
auto get_const = [&](mgb::DType dtype) -> py::object { | |||
return _Const(tensor, py::cast(dtype), src.attr("device"), src); | |||
}; | |||
if (is_bool_list(tensor.ptr()) || is_bool_dtype(tensor.ptr())) { | |||
tensor = get_const(dtype::Bool()); | |||
} else { | |||
tensor = get_const(dtype::Int32()); | |||
} | |||
if (!is_bool_dtype(tensor.ptr())) { | |||
return tensor; | |||
} | |||
} else { | |||
if (!is_bool_dtype(tensor.ptr())) { | |||
return tensor; | |||
} | |||
} | |||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||
std::vector<PyObject*> p; | |||
p.resize(3); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
p[2] = tensor.ptr(); | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
return ret[1]; | |||
} | |||
py::tuple _try_cond_take(py::handle tensor, py::handle index) { | |||
if (!hasattr(index, "dtype") || !hasattr(index, "shape")) { | |||
return py::tuple(); | |||
} | |||
if (!is_bool_dtype(index.ptr()) || | |||
_make_shape_tuple(getattr(index, "shape")) | |||
.not_equal(_make_shape_tuple(getattr(tensor, "shape")))) { | |||
return py::tuple(); | |||
} | |||
py::object iobj; | |||
if (PyArray_Check(index.ptr())) { | |||
iobj = | |||
_Const(index, py::cast((mgb::DType)dtype::Bool()), | |||
getattr(tensor, "device"), tensor); | |||
} else { | |||
iobj = py::reinterpret_borrow<py::object>(index); | |||
} | |||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||
std::vector<PyObject*> p; | |||
p.resize(3); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
p[2] = iobj.ptr(); | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
return ret; | |||
} | |||
py::tuple _remove_ellipsis(py::object tensor, py::tuple tuple_val) { | |||
size_t tuple_size = tuple_val.size(); | |||
size_t ndim_sum = 0, cur_sum = 0; | |||
int pos = -1; | |||
bool has_unknown_ndim_bool_index = false; | |||
for (size_t i = 0; i < tuple_size; ++i) { | |||
py::object handle = tuple_val[i]; | |||
if (handle.ptr() == Py_Ellipsis) { | |||
pos = static_cast<int>(i); | |||
for (size_t j = 0; j < i; ++j) { | |||
py::object t = tuple_val[j]; | |||
if (t.ptr() == Py_Ellipsis) { | |||
throw py::index_error("only one ellipsis is allowed."); | |||
} | |||
} | |||
} else { | |||
size_t ndim_incr = 1; | |||
if (hasattr(handle, "dtype") && is_bool_dtype(handle.ptr()) && | |||
hasattr(handle, "ndim")) { | |||
py::object ndim = getattr(handle, "ndim"); | |||
if (PyLong_Check(ndim.ptr())) { | |||
ndim_incr = PyLong_AsLong(ndim.ptr()); | |||
} else { | |||
has_unknown_ndim_bool_index = true; | |||
} | |||
} | |||
cur_sum += ndim_incr; | |||
} | |||
} | |||
if (pos == -1) { | |||
return tuple_val; | |||
} else { | |||
if (has_unknown_ndim_bool_index) { | |||
throw py::index_error( | |||
"does not support bool index with unknown shape when using " | |||
"Ellipsis."); | |||
} | |||
try { | |||
ndim_sum = getattr(tensor, "ndim").cast<size_t>(); | |||
} catch (py::error_already_set& err) { | |||
throw py::index_error( | |||
"does not support Ellipsis when tensor's ndim is unknown."); | |||
} | |||
py::tuple ret(ndim_sum - cur_sum + tuple_size - 1); | |||
size_t idx = 0; | |||
for (size_t i = 0; i < tuple_size; ++i) { | |||
if (i == pos) { | |||
for (size_t j = cur_sum; j < ndim_sum; ++j) { | |||
ret[idx++] = PySlice_New(NULL, NULL, NULL); | |||
} | |||
} else { | |||
ret[idx++] = tuple_val[i]; | |||
} | |||
} | |||
return ret; | |||
} | |||
} | |||
py::tuple _expand_bool_dim(py::object tensor, py::tuple tuple_val) { | |||
py::tuple cur_shape = _make_shape_tuple(py::handle(getattr(tensor, "shape"))); | |||
py::list new_tuple_val(0); | |||
size_t offset = 0; | |||
size_t tdim = 0; | |||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||
py::handle k = tuple_val[i]; | |||
if (is_bool_dtype(k.ptr())) { | |||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||
if (ndim > 1) { | |||
py::tuple ishape = _make_shape_tuple(py::handle(getattr(k, "shape"))); | |||
for (size_t j = 0; j < ndim; ++j) { | |||
if (cur_shape[tdim + j - offset].cast<size_t>() != | |||
ishape[j].cast<size_t>()) { | |||
std::string msg = | |||
"boolean index did not match tensor along dimension " + | |||
std::to_string(tdim + j) + "; dimension is " + | |||
std::to_string( | |||
cur_shape[tdim + j - offset].cast<size_t>()) + | |||
" but corresponding boolean dimension is " + | |||
std::to_string(ishape[j].cast<size_t>()); | |||
throw py::index_error(msg.c_str()); | |||
} | |||
} | |||
py::object new_k = getattr(k, "reshape")(-1); | |||
py::object kshape = getattr(new_k, "shape"); | |||
py::list new_shape(0); | |||
PyObject* sym = PyObject_CallObject(cpp_use_symbolic_shape, nullptr); | |||
bool is_sym = (sym == Py_True); | |||
Py_XDECREF(sym); | |||
if (is_sym) { | |||
py::object tshape = getattr(tensor, "shape"); | |||
for (size_t j = 0; j < i; ++j) { | |||
new_shape.append(tshape[py::int_(j)]); | |||
} | |||
new_shape.append(kshape[py::int_(0)]); | |||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||
new_shape.append(cur_shape[j]); | |||
} | |||
py::tuple args = py::make_tuple(new_shape); | |||
PyObject* shape_tensor = | |||
PyObject_CallObject(cpp_astensor1d, args.ptr()); | |||
py::object reshape_func = getattr(tensor, "reshape"); | |||
Py_INCREF(shape_tensor); | |||
PyObject* Args = PyTuple_New(1); | |||
PyTuple_SetItem(Args, 0, shape_tensor); | |||
PyObject* new_tensor = | |||
PyObject_CallObject(reshape_func.ptr(), Args); | |||
Py_XDECREF(Args); | |||
tensor = py::reinterpret_steal<py::object>(new_tensor); | |||
cur_shape = _make_shape_tuple(py::handle(shape_tensor)); | |||
Py_XDECREF(shape_tensor); | |||
} else { | |||
for (size_t j = 0; j < i; ++j) { | |||
new_shape.append(cur_shape[j]); | |||
} | |||
new_shape.append(py::reinterpret_borrow<py::tuple>(kshape)[0]); | |||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||
new_shape.append(cur_shape[j]); | |||
} | |||
cur_shape = new_shape; | |||
tensor = getattr(tensor, "reshape")(cur_shape); | |||
} | |||
offset++; | |||
tdim += ndim; | |||
} | |||
new_tuple_val.append(k); | |||
} else { | |||
new_tuple_val.append(k); | |||
tdim++; | |||
} | |||
} | |||
return py::make_tuple(tensor, py::reinterpret_borrow<py::tuple>(new_tuple_val)); | |||
} | |||
py::tuple _unpack_indexes(py::handle inp_hdl, py::handle idx_hdl) { | |||
py::object inp = py::reinterpret_borrow<py::object>(inp_hdl); | |||
py::tuple tuple_val; | |||
if (py::isinstance<py::tuple>(idx_hdl)) { | |||
tuple_val = py::reinterpret_borrow<py::tuple>(idx_hdl); | |||
} else { | |||
tuple_val = py::make_tuple(idx_hdl); | |||
} | |||
bool use_subtensor = true; | |||
bool need_remove_ellipsis = false; | |||
bool need_expand_bool_dim = false; | |||
size_t idx_ndim = 0; | |||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||
py::object k = tuple_val[i]; | |||
if (k.ptr() == Py_None) { | |||
throw py::index_error("newaxis is not allowed here"); | |||
} else if (k.ptr() == Py_Ellipsis) { | |||
need_remove_ellipsis = true; | |||
} else { | |||
if (is_bool_dtype(k.ptr()) && hasattr(k, "ndim")) { | |||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||
idx_ndim += ndim; | |||
if (ndim > 1) { | |||
need_expand_bool_dim = true; | |||
} | |||
} else { | |||
idx_ndim++; | |||
} | |||
} | |||
} | |||
try { | |||
size_t inp_ndim = getattr(inp, "ndim").cast<size_t>(); | |||
if (idx_ndim > inp_ndim) { | |||
std::string msg = "too many indices for tensor: tensor is " + | |||
std::to_string(inp_ndim) + "-dimensional, but " + | |||
std::to_string(idx_ndim) + " were indexed"; | |||
throw py::index_error(msg.c_str()); | |||
} | |||
} catch (py::error_already_set& err) { | |||
; // ignore | |||
} | |||
if (need_remove_ellipsis) { | |||
tuple_val = _remove_ellipsis(inp, tuple_val); | |||
} | |||
if (need_expand_bool_dim) { | |||
py::object shape = getattr(inp, "shape"); | |||
if (shape.ptr() != Py_None) { | |||
py::tuple ret = _expand_bool_dim(inp, tuple_val); | |||
inp = ret[0]; | |||
tuple_val = ret[1]; | |||
} | |||
} | |||
py::list items; | |||
py::list tensors; | |||
int cur_axis = -1; | |||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||
py::object handle = tuple_val[i]; | |||
cur_axis++; | |||
if (!is_scalar(handle.ptr()) && !PySlice_Check(handle.ptr())) { | |||
use_subtensor = false; | |||
} | |||
py::list item; | |||
item.append(cur_axis); | |||
auto push = [&](PyObject* v) { | |||
if (v == Py_None) { | |||
item.append(false); | |||
} else { | |||
item.append(true); | |||
tensors.append(_get_index(py::reinterpret_borrow<py::object>(v), inp)); | |||
} | |||
}; | |||
if (PySlice_Check(handle.ptr())) { | |||
PySliceObject* s = (PySliceObject*)handle.ptr(); | |||
if (s->start == Py_None && s->stop == Py_None && s->step == Py_None) { | |||
continue; | |||
} | |||
push(s->start); | |||
push(s->stop); | |||
push(s->step); | |||
item.append(false); | |||
} else { | |||
for (size_t j = 0; j < 3; j++) | |||
item.append(false); | |||
push(handle.ptr()); | |||
} | |||
items.append(item); | |||
} | |||
return py::make_tuple(inp, tensors, items, use_subtensor, need_expand_bool_dim); | |||
} | |||
py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) { | |||
py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl); | |||
if (try_res.size() == 2) { | |||
return try_res[0]; | |||
} | |||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||
for (size_t i = 0; i < py_items.size(); ++i) { | |||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||
cpp_items.push_back( | |||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||
} | |||
static std::shared_ptr<OpDef> op; | |||
if (up[3].cast<bool>()) { | |||
op = Subtensor::make(cpp_items); | |||
} else { | |||
op = IndexingMultiAxisVec::make(cpp_items); | |||
} | |||
std::vector<PyObject*> p; | |||
p.resize(tensors.size() + 2); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
for (size_t i = 0; i < tensors.size(); ++i) { | |||
p[i + 2] = tensors[i].ptr(); | |||
} | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
return ret[0]; | |||
} | |||
py::object _setitem_cpp(py::handle inp_hdl, py::handle idx_hdl, py::handle val_hdl) { | |||
py::object org_shape = getattr(inp_hdl, "shape"); | |||
py::object val = py::reinterpret_borrow<py::object>(val_hdl); | |||
if (!TensorWrapper::try_cast(val.ptr()) && !py::isinstance<PySymbolVar>(val)) { | |||
val = | |||
_Const(val_hdl, getattr(inp_hdl, "dtype"), getattr(inp_hdl, "device"), | |||
inp_hdl); | |||
} | |||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||
for (size_t i = 0; i < py_items.size(); ++i) { | |||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||
cpp_items.push_back( | |||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||
} | |||
static std::shared_ptr<OpDef> op, set_op; | |||
if (up[3].cast<bool>()) { | |||
op = Subtensor::make(cpp_items); | |||
} else { | |||
op = IndexingMultiAxisVec::make(cpp_items); | |||
} | |||
std::vector<PyObject*> p; | |||
p.resize(tensors.size() + 2); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
for (size_t i = 0; i < tensors.size(); ++i) { | |||
p[i + 2] = tensors[i].ptr(); | |||
} | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
py::object tmp_result = ret[0]; | |||
try { | |||
py::object value_tuple_shape = val.attr("_tuple_shape"); | |||
py::object tmp_result_tuple_shape = tmp_result.attr("_tuple_shape"); | |||
py::tuple value_shape = py::reinterpret_borrow<py::tuple>(value_tuple_shape); | |||
py::tuple tmp_result_shape = | |||
py::reinterpret_borrow<py::tuple>(tmp_result_tuple_shape); | |||
for (size_t i = 0; i < value_shape.size() && i < tmp_result_shape.size(); ++i) { | |||
size_t vs = value_shape[value_shape.size() - i - 1].cast<size_t>(); | |||
size_t ts = | |||
tmp_result_shape[tmp_result_shape.size() - i - 1].cast<size_t>(); | |||
if (vs != 1 && vs != ts) { | |||
std::string lhs = "", rhs = ""; | |||
for (size_t j = 0; j < tmp_result_shape.size(); ++j) { | |||
lhs += std::to_string(tmp_result_shape[j].cast<size_t>()); | |||
if (j) | |||
lhs += ","; | |||
} | |||
for (size_t j = 0; j < value_shape.size(); ++j) { | |||
rhs += std::to_string(value_shape[j].cast<size_t>()); | |||
if (j) | |||
rhs += ","; | |||
} | |||
throw py::value_error( | |||
"cannot copy tensor with shape (" + rhs + | |||
") to subtensor with shape (" + lhs + ")"); | |||
} | |||
} | |||
} catch (py::error_already_set& err) { | |||
; | |||
} | |||
py::object broadcast_func = getattr(val, "_broadcast"); | |||
PyObject* Args = PyTuple_New(1); | |||
PyTuple_SetItem(Args, 0, getattr(tmp_result, "shape").release().ptr()); | |||
PyObject* new_val = PyObject_CallObject(broadcast_func.ptr(), Args); | |||
Py_XDECREF(Args); | |||
val = py::reinterpret_steal<py::object>(new_val); | |||
if (up[3].cast<bool>()) { | |||
set_op = SetSubtensor::make(cpp_items); | |||
} else { | |||
set_op = IndexingSetMultiAxisVec::make(cpp_items); | |||
} | |||
std::vector<PyObject*> q; | |||
q.resize(tensors.size() + 3); | |||
py::object Set_Op = py::cast(set_op); | |||
q[0] = Set_Op.ptr(); | |||
q[1] = tensor.ptr(); | |||
q[2] = val.ptr(); | |||
for (size_t i = 0; i < tensors.size(); ++i) { | |||
q[i + 3] = tensors[i].ptr(); | |||
} | |||
py::tuple result = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, q.data(), q.size())); | |||
py::object res = result[0]; | |||
if (up[4].cast<bool>()) { | |||
py::object reshape_func = getattr(res, "reshape"); | |||
PyObject* Args = PyTuple_New(1); | |||
PyTuple_SetItem(Args, 0, org_shape.release().ptr()); | |||
PyObject* new_tensor = PyObject_CallObject(reshape_func.ptr(), Args); | |||
Py_XDECREF(Args); | |||
res = py::reinterpret_steal<py::object>(new_tensor); | |||
} | |||
return res; | |||
} | |||
// Returns the dtype that would result from performing an arithmetic | |||
// operation on the provided input tensors and scalars. | |||
PyObject* dtype_promotion(PyObject* self, PyObject* const* args, size_t nargs) { | |||
@@ -1126,30 +576,6 @@ PyObject* get_device(PyObject* self, PyObject* const* args, size_t nargs) { | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs) { | |||
try { | |||
return _make_shape_tuple(py::handle(args[0])).release().ptr(); | |||
} | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||
try { | |||
return _getitem_cpp(py::handle(args[0]), py::handle(args[1])).release().ptr(); | |||
} | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||
try { | |||
return _setitem_cpp( | |||
py::handle(args[0]), py::handle(args[1]), py::handle(args[2])) | |||
.release() | |||
.ptr(); | |||
} | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
#ifdef METH_FASTCALL | |||
#define MGE_PY_INTERFACE(NAME, FUNC) \ | |||
{ #NAME, (PyCFunction)FUNC, METH_FASTCALL, nullptr } | |||
@@ -38,6 +38,8 @@ namespace mgb::imperative::python { | |||
extern interpreter::Interpreter::Channel* interpreter_for_py; | |||
extern PyTypeObject* py_tensor_type; | |||
extern PyObject* cpp_use_symbolic_shape; | |||
extern PyObject* cpp_astensor1d; | |||
struct Tensor : NonCopyableObj { | |||
private: | |||
@@ -0,0 +1,630 @@ | |||
/** | |||
* \file imperative/python/src/tensor.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "megbrain/common.h" | |||
#include "megbrain/dtype.h" | |||
#include "megbrain/imperative/ops/autogen.h" | |||
#include "megbrain/imperative/ops/backward_graph.h" | |||
#include "megbrain/imperative/ops/utility.h" | |||
#include "megbrain/imperative/profiler.h" | |||
#include "megbrain/imperative/transformations/eval.h" | |||
#include "megbrain/imperative/transformations/lazy.h" | |||
#include "megbrain/imperative/transformations/scalar.h" | |||
#include "megbrain/imperative/transformations/symbol.h" | |||
#include "megbrain/imperative/transformations/trace.h" | |||
#include "megbrain/imperative/utils/map.h" | |||
#include "megbrain/imperative/utils/stats.h" | |||
#include "megbrain/opr/io.h" | |||
#include "megbrain/plugin/profiler.h" | |||
#include "./common.h" | |||
#include "./grad.h" | |||
#include "./graph_rt.h" | |||
#include "./helper.h" | |||
#include "./module_trace.h" | |||
#include "./numpy_dtypes.h" | |||
#include "./tensor.h" | |||
#include "./tensor_utils.h" | |||
#include "./transformation.h" | |||
#include <object.h> | |||
#include <pybind11/numpy.h> | |||
#include <pybind11/operators.h> | |||
#include <pybind11/pytypes.h> | |||
#include <pyerrors.h> | |||
#include <range/v3/all.hpp> | |||
#include <string> | |||
#include <unordered_map> | |||
#include "../../src/impl/mgb_cg_impl.h" | |||
namespace py = pybind11; | |||
namespace views = ranges::views; | |||
namespace mgb::imperative::python { | |||
bool is_scalar(PyObject* tensor) { | |||
if (py::isinstance<PySymbolVar>(py::handle(tensor))) { | |||
auto var = py::handle(tensor).cast<PySymbolVar*>(); | |||
return var->is_scalar; | |||
} | |||
auto* tw = TensorWrapper::try_cast(tensor); | |||
if (tw) { | |||
return tw->m_tensor->is_scalar(); | |||
} | |||
return PyArray_CheckAnyScalar(tensor); | |||
} | |||
bool is_bool_list(PyObject* arg) { | |||
if (!PyList_Check(arg)) { | |||
return false; | |||
} | |||
size_t sz = PyList_Size(arg); | |||
if (!sz) { | |||
return false; | |||
} | |||
for (size_t i = 0; i < sz; ++i) { | |||
PyObject* handle = PyList_GetItem(arg, i); | |||
if (!PyBool_Check(handle)) { | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
bool is_bool_dtype(PyObject* args) { | |||
if (!PyObject_HasAttrString(args, "dtype")) | |||
return false; | |||
PyObject* dobj = PyObject_GetAttrString(args, "dtype"); | |||
PyArray_Descr* dtype; | |||
PyArray_DescrConverter(dobj, &dtype); | |||
bool ret = (dtype->kind == 'b'); | |||
Py_XDECREF(dtype); | |||
Py_XDECREF(dobj); | |||
return ret; | |||
} | |||
py::object _Const( | |||
py::handle value, py::handle dtype, py::handle device, py::handle ref) { | |||
py::object val = py::reinterpret_borrow<py::object>(value); | |||
if (PyArray_Check(value.ptr())) { | |||
py::tuple strides = | |||
py::reinterpret_borrow<py::tuple>(getattr(value, "strides")); | |||
bool need_squeeze = false; | |||
for (size_t i = 0; i < strides.size(); ++i) { | |||
if (strides[i].cast<ptrdiff_t>() == 0) { | |||
need_squeeze = true; | |||
} | |||
} | |||
if (need_squeeze) { | |||
val = py::reinterpret_borrow<py::array>(value); | |||
val = val.attr("squeeze")(); | |||
val = val.attr("reshape")(val.attr("shape")); | |||
} | |||
} | |||
if (py::isinstance<PySymbolVar>(ref)) { | |||
auto ref_var = ref.cast<PySymbolVar*>(); | |||
auto* graph = ref_var->m_node->owner_graph(); | |||
auto cn = device.cast<CompNode>(); | |||
OperatorNodeConfig config(cn); | |||
auto hv = npy::np2tensor( | |||
val.ptr(), npy::Meth::borrow(cn), dtype.cast<mgb::DType>()); | |||
auto typeobj = ref.get_type(); | |||
return typeobj(opr::ImmutableTensor::make(*graph, hv, config).node()); | |||
} | |||
py::tuple tup = py::make_tuple(val, dtype, device, true, false, py::none()); | |||
return TensorWrapper::make(py_tensor_type, tup.ptr(), nullptr); | |||
} | |||
py::tuple _make_shape_tuple(py::handle shape) { | |||
py::list orig; | |||
py::list ret(0); | |||
auto solve_one = [&](py::handle val) { | |||
if (TensorWrapper::try_cast(val.ptr()) || py::isinstance<PySymbolVar>(val)) { | |||
py::object np = getattr(val, "numpy")(); | |||
PyArrayObject* arr = (PyArrayObject*)np.ptr(); | |||
PyObject* maybe_list = PyArray_ToList(arr); | |||
if (PyList_Check(maybe_list)) { | |||
py::list may = py::reinterpret_steal<py::list>(maybe_list); | |||
for (size_t i = 0; i < may.size(); ++i) { | |||
ret.append(may[i]); | |||
} | |||
} else { | |||
mgb_assert(PyLong_Check(maybe_list)); | |||
ret.append(PyLong_AsLong(maybe_list)); | |||
Py_XDECREF(maybe_list); | |||
} | |||
} else if (PyArray_Check(val.ptr())) { | |||
ret.append(PyArray_PyIntAsInt(val.ptr())); | |||
} else { | |||
ret.append(PyLong_AsLong(val.ptr())); | |||
} | |||
}; | |||
if (PyArray_Check(shape.ptr()) && !PyArray_CheckAnyScalar(shape.ptr())) { | |||
orig = py::reinterpret_steal<py::list>( | |||
PyArray_ToList((PyArrayObject*)shape.ptr())); | |||
for (size_t i = 0; i < orig.size(); ++i) { | |||
solve_one(orig[i]); | |||
} | |||
} else if (PyList_Check(shape.ptr())) { | |||
orig = py::reinterpret_borrow<py::list>(shape); | |||
for (size_t i = 0; i < orig.size(); ++i) { | |||
solve_one(orig[i]); | |||
} | |||
} else if (PyTuple_Check(shape.ptr())) { | |||
py::tuple tup = py::reinterpret_borrow<py::tuple>(shape); | |||
for (size_t i = 0; i < tup.size(); ++i) { | |||
solve_one(tup[i]); | |||
} | |||
} else { | |||
solve_one(shape); | |||
} | |||
return py::reinterpret_steal<py::tuple>(PyList_AsTuple(ret.ptr())); | |||
} | |||
py::object _get_index(py::object tensor, py::object src) { | |||
if (!TensorWrapper::try_cast(tensor.ptr()) && | |||
!py::isinstance<PySymbolVar>(tensor)) { | |||
auto get_const = [&](mgb::DType dtype) -> py::object { | |||
return _Const(tensor, py::cast(dtype), src.attr("device"), src); | |||
}; | |||
if (is_bool_list(tensor.ptr()) || is_bool_dtype(tensor.ptr())) { | |||
tensor = get_const(dtype::Bool()); | |||
} else { | |||
tensor = get_const(dtype::Int32()); | |||
} | |||
if (!is_bool_dtype(tensor.ptr())) { | |||
return tensor; | |||
} | |||
} else { | |||
if (!is_bool_dtype(tensor.ptr())) { | |||
return tensor; | |||
} | |||
} | |||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||
std::vector<PyObject*> p; | |||
p.resize(3); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
p[2] = tensor.ptr(); | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
return ret[1]; | |||
} | |||
py::tuple _try_cond_take(py::handle tensor, py::handle index) { | |||
if (!hasattr(index, "dtype") || !hasattr(index, "shape")) { | |||
return py::tuple(); | |||
} | |||
if (!is_bool_dtype(index.ptr()) || | |||
_make_shape_tuple(getattr(index, "shape")) | |||
.not_equal(_make_shape_tuple(getattr(tensor, "shape")))) { | |||
return py::tuple(); | |||
} | |||
py::object iobj; | |||
if (PyArray_Check(index.ptr())) { | |||
iobj = | |||
_Const(index, py::cast((mgb::DType)dtype::Bool()), | |||
getattr(tensor, "device"), tensor); | |||
} else { | |||
iobj = py::reinterpret_borrow<py::object>(index); | |||
} | |||
static std::shared_ptr<OpDef> op = CondTake::make(); | |||
std::vector<PyObject*> p; | |||
p.resize(3); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
p[2] = iobj.ptr(); | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
return ret; | |||
} | |||
py::tuple _remove_ellipsis(py::object tensor, py::tuple tuple_val) { | |||
size_t tuple_size = tuple_val.size(); | |||
size_t ndim_sum = 0, cur_sum = 0; | |||
int pos = -1; | |||
bool has_unknown_ndim_bool_index = false; | |||
for (size_t i = 0; i < tuple_size; ++i) { | |||
py::object handle = tuple_val[i]; | |||
if (handle.ptr() == Py_Ellipsis) { | |||
pos = static_cast<int>(i); | |||
for (size_t j = 0; j < i; ++j) { | |||
py::object t = tuple_val[j]; | |||
if (t.ptr() == Py_Ellipsis) { | |||
throw py::index_error("only one ellipsis is allowed."); | |||
} | |||
} | |||
} else { | |||
size_t ndim_incr = 1; | |||
if (hasattr(handle, "dtype") && is_bool_dtype(handle.ptr()) && | |||
hasattr(handle, "ndim")) { | |||
py::object ndim = getattr(handle, "ndim"); | |||
if (PyLong_Check(ndim.ptr())) { | |||
ndim_incr = PyLong_AsLong(ndim.ptr()); | |||
} else { | |||
has_unknown_ndim_bool_index = true; | |||
} | |||
} | |||
cur_sum += ndim_incr; | |||
} | |||
} | |||
if (pos == -1) { | |||
return tuple_val; | |||
} else { | |||
if (has_unknown_ndim_bool_index) { | |||
throw py::index_error( | |||
"does not support bool index with unknown shape when using " | |||
"Ellipsis."); | |||
} | |||
try { | |||
ndim_sum = getattr(tensor, "ndim").cast<size_t>(); | |||
} catch (py::error_already_set& err) { | |||
throw py::index_error( | |||
"does not support Ellipsis when tensor's ndim is unknown."); | |||
} | |||
py::tuple ret(ndim_sum - cur_sum + tuple_size - 1); | |||
size_t idx = 0; | |||
for (size_t i = 0; i < tuple_size; ++i) { | |||
if (i == pos) { | |||
for (size_t j = cur_sum; j < ndim_sum; ++j) { | |||
ret[idx++] = PySlice_New(NULL, NULL, NULL); | |||
} | |||
} else { | |||
ret[idx++] = tuple_val[i]; | |||
} | |||
} | |||
return ret; | |||
} | |||
} | |||
py::tuple _expand_bool_dim(py::object tensor, py::tuple tuple_val) { | |||
py::tuple cur_shape = _make_shape_tuple(py::handle(getattr(tensor, "shape"))); | |||
py::list new_tuple_val(0); | |||
size_t offset = 0; | |||
size_t tdim = 0; | |||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||
py::handle k = tuple_val[i]; | |||
if (is_bool_dtype(k.ptr())) { | |||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||
if (ndim > 1) { | |||
py::tuple ishape = _make_shape_tuple(py::handle(getattr(k, "shape"))); | |||
for (size_t j = 0; j < ndim; ++j) { | |||
if (cur_shape[tdim + j - offset].cast<size_t>() != | |||
ishape[j].cast<size_t>()) { | |||
std::string msg = | |||
"boolean index did not match tensor along dimension " + | |||
std::to_string(tdim + j) + "; dimension is " + | |||
std::to_string( | |||
cur_shape[tdim + j - offset].cast<size_t>()) + | |||
" but corresponding boolean dimension is " + | |||
std::to_string(ishape[j].cast<size_t>()); | |||
throw py::index_error(msg.c_str()); | |||
} | |||
} | |||
py::object new_k = getattr(k, "reshape")(-1); | |||
py::object kshape = getattr(new_k, "shape"); | |||
py::list new_shape(0); | |||
PyObject* sym = PyObject_CallObject(cpp_use_symbolic_shape, nullptr); | |||
bool is_sym = (sym == Py_True); | |||
Py_XDECREF(sym); | |||
if (is_sym) { | |||
py::object tshape = getattr(tensor, "shape"); | |||
for (size_t j = 0; j < i; ++j) { | |||
new_shape.append(tshape[py::int_(j)]); | |||
} | |||
new_shape.append(kshape[py::int_(0)]); | |||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||
new_shape.append(cur_shape[j]); | |||
} | |||
py::tuple args = py::make_tuple(new_shape); | |||
PyObject* shape_tensor = | |||
PyObject_CallObject(cpp_astensor1d, args.ptr()); | |||
py::object reshape_func = getattr(tensor, "reshape"); | |||
Py_INCREF(shape_tensor); | |||
PyObject* Args = PyTuple_New(1); | |||
PyTuple_SetItem(Args, 0, shape_tensor); | |||
PyObject* new_tensor = | |||
PyObject_CallObject(reshape_func.ptr(), Args); | |||
Py_XDECREF(Args); | |||
tensor = py::reinterpret_steal<py::object>(new_tensor); | |||
cur_shape = _make_shape_tuple(py::handle(shape_tensor)); | |||
Py_XDECREF(shape_tensor); | |||
} else { | |||
for (size_t j = 0; j < i; ++j) { | |||
new_shape.append(cur_shape[j]); | |||
} | |||
new_shape.append(py::reinterpret_borrow<py::tuple>(kshape)[0]); | |||
for (size_t j = tdim + ndim - offset; j < cur_shape.size(); ++j) { | |||
new_shape.append(cur_shape[j]); | |||
} | |||
cur_shape = new_shape; | |||
tensor = getattr(tensor, "reshape")(cur_shape); | |||
} | |||
offset++; | |||
tdim += ndim; | |||
} | |||
new_tuple_val.append(k); | |||
} else { | |||
new_tuple_val.append(k); | |||
tdim++; | |||
} | |||
} | |||
return py::make_tuple(tensor, py::reinterpret_borrow<py::tuple>(new_tuple_val)); | |||
} | |||
py::tuple _unpack_indexes(py::handle inp_hdl, py::handle idx_hdl) { | |||
py::object inp = py::reinterpret_borrow<py::object>(inp_hdl); | |||
py::tuple tuple_val; | |||
if (py::isinstance<py::tuple>(idx_hdl)) { | |||
tuple_val = py::reinterpret_borrow<py::tuple>(idx_hdl); | |||
} else { | |||
tuple_val = py::make_tuple(idx_hdl); | |||
} | |||
bool use_subtensor = true; | |||
bool need_remove_ellipsis = false; | |||
bool need_expand_bool_dim = false; | |||
size_t idx_ndim = 0; | |||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||
py::object k = tuple_val[i]; | |||
if (k.ptr() == Py_None) { | |||
throw py::index_error("newaxis is not allowed here"); | |||
} else if (k.ptr() == Py_Ellipsis) { | |||
need_remove_ellipsis = true; | |||
} else { | |||
if (is_bool_dtype(k.ptr()) && hasattr(k, "ndim")) { | |||
size_t ndim = getattr(k, "ndim").cast<size_t>(); | |||
idx_ndim += ndim; | |||
if (ndim > 1) { | |||
need_expand_bool_dim = true; | |||
} | |||
} else { | |||
idx_ndim++; | |||
} | |||
} | |||
} | |||
try { | |||
size_t inp_ndim = getattr(inp, "ndim").cast<size_t>(); | |||
if (idx_ndim > inp_ndim) { | |||
std::string msg = "too many indices for tensor: tensor is " + | |||
std::to_string(inp_ndim) + "-dimensional, but " + | |||
std::to_string(idx_ndim) + " were indexed"; | |||
throw py::index_error(msg.c_str()); | |||
} | |||
} catch (py::error_already_set& err) { | |||
; // ignore | |||
} | |||
if (need_remove_ellipsis) { | |||
tuple_val = _remove_ellipsis(inp, tuple_val); | |||
} | |||
if (need_expand_bool_dim) { | |||
py::object shape = getattr(inp, "shape"); | |||
if (shape.ptr() != Py_None) { | |||
py::tuple ret = _expand_bool_dim(inp, tuple_val); | |||
inp = ret[0]; | |||
tuple_val = ret[1]; | |||
} | |||
} | |||
py::list items; | |||
py::list tensors; | |||
int cur_axis = -1; | |||
for (size_t i = 0; i < tuple_val.size(); ++i) { | |||
py::object handle = tuple_val[i]; | |||
cur_axis++; | |||
if (!is_scalar(handle.ptr()) && !PySlice_Check(handle.ptr())) { | |||
use_subtensor = false; | |||
} | |||
py::list item; | |||
item.append(cur_axis); | |||
auto push = [&](PyObject* v) { | |||
if (v == Py_None) { | |||
item.append(false); | |||
} else { | |||
item.append(true); | |||
tensors.append(_get_index(py::reinterpret_borrow<py::object>(v), inp)); | |||
} | |||
}; | |||
if (PySlice_Check(handle.ptr())) { | |||
PySliceObject* s = (PySliceObject*)handle.ptr(); | |||
if (s->start == Py_None && s->stop == Py_None && s->step == Py_None) { | |||
continue; | |||
} | |||
push(s->start); | |||
push(s->stop); | |||
push(s->step); | |||
item.append(false); | |||
} else { | |||
for (size_t j = 0; j < 3; j++) | |||
item.append(false); | |||
push(handle.ptr()); | |||
} | |||
items.append(item); | |||
} | |||
return py::make_tuple(inp, tensors, items, use_subtensor, need_expand_bool_dim); | |||
} | |||
py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) { | |||
py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl); | |||
if (try_res.size() == 2) { | |||
return try_res[0]; | |||
} | |||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||
for (size_t i = 0; i < py_items.size(); ++i) { | |||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||
cpp_items.push_back( | |||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||
} | |||
static std::shared_ptr<OpDef> op; | |||
if (up[3].cast<bool>()) { | |||
op = Subtensor::make(cpp_items); | |||
} else { | |||
op = IndexingMultiAxisVec::make(cpp_items); | |||
} | |||
std::vector<PyObject*> p; | |||
p.resize(tensors.size() + 2); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
for (size_t i = 0; i < tensors.size(); ++i) { | |||
p[i + 2] = tensors[i].ptr(); | |||
} | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
return ret[0]; | |||
} | |||
py::object _setitem_cpp(py::handle inp_hdl, py::handle idx_hdl, py::handle val_hdl) { | |||
py::object org_shape = getattr(inp_hdl, "shape"); | |||
py::object val = py::reinterpret_borrow<py::object>(val_hdl); | |||
if (!TensorWrapper::try_cast(val.ptr()) && !py::isinstance<PySymbolVar>(val)) { | |||
val = | |||
_Const(val_hdl, getattr(inp_hdl, "dtype"), getattr(inp_hdl, "device"), | |||
inp_hdl); | |||
} | |||
py::tuple up = _unpack_indexes(inp_hdl, idx_hdl); | |||
py::object tensor = py::reinterpret_borrow<py::object>(up[0]); | |||
py::list tensors = py::reinterpret_borrow<py::list>(up[1]); | |||
py::list py_items = py::reinterpret_borrow<py::list>(up[2]); | |||
std::vector<std::tuple<int8_t, bool, bool, bool, bool>> cpp_items; | |||
for (size_t i = 0; i < py_items.size(); ++i) { | |||
py::list item = py::reinterpret_borrow<py::list>(py_items[i]); | |||
cpp_items.push_back( | |||
{item[0].cast<int8_t>(), item[1].cast<bool>(), item[2].cast<bool>(), | |||
item[3].cast<bool>(), item[4].cast<bool>()}); | |||
} | |||
static std::shared_ptr<OpDef> op, set_op; | |||
if (up[3].cast<bool>()) { | |||
op = Subtensor::make(cpp_items); | |||
} else { | |||
op = IndexingMultiAxisVec::make(cpp_items); | |||
} | |||
std::vector<PyObject*> p; | |||
p.resize(tensors.size() + 2); | |||
py::object Op = py::cast(op); | |||
p[0] = Op.ptr(); | |||
p[1] = tensor.ptr(); | |||
for (size_t i = 0; i < tensors.size(); ++i) { | |||
p[i + 2] = tensors[i].ptr(); | |||
} | |||
py::tuple ret = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size())); | |||
py::object tmp_result = ret[0]; | |||
try { | |||
py::object value_tuple_shape = val.attr("_tuple_shape"); | |||
py::object tmp_result_tuple_shape = tmp_result.attr("_tuple_shape"); | |||
py::tuple value_shape = py::reinterpret_borrow<py::tuple>(value_tuple_shape); | |||
py::tuple tmp_result_shape = | |||
py::reinterpret_borrow<py::tuple>(tmp_result_tuple_shape); | |||
for (size_t i = 0; i < value_shape.size() && i < tmp_result_shape.size(); ++i) { | |||
size_t vs = value_shape[value_shape.size() - i - 1].cast<size_t>(); | |||
size_t ts = | |||
tmp_result_shape[tmp_result_shape.size() - i - 1].cast<size_t>(); | |||
if (vs != 1 && vs != ts) { | |||
std::string lhs = "", rhs = ""; | |||
for (size_t j = 0; j < tmp_result_shape.size(); ++j) { | |||
lhs += std::to_string(tmp_result_shape[j].cast<size_t>()); | |||
if (j) | |||
lhs += ","; | |||
} | |||
for (size_t j = 0; j < value_shape.size(); ++j) { | |||
rhs += std::to_string(value_shape[j].cast<size_t>()); | |||
if (j) | |||
rhs += ","; | |||
} | |||
throw py::value_error( | |||
"cannot copy tensor with shape (" + rhs + | |||
") to subtensor with shape (" + lhs + ")"); | |||
} | |||
} | |||
} catch (py::error_already_set& err) { | |||
; | |||
} | |||
py::object broadcast_func = getattr(val, "_broadcast"); | |||
PyObject* Args = PyTuple_New(1); | |||
PyTuple_SetItem(Args, 0, getattr(tmp_result, "shape").release().ptr()); | |||
PyObject* new_val = PyObject_CallObject(broadcast_func.ptr(), Args); | |||
Py_XDECREF(Args); | |||
val = py::reinterpret_steal<py::object>(new_val); | |||
if (up[3].cast<bool>()) { | |||
set_op = SetSubtensor::make(cpp_items); | |||
} else { | |||
set_op = IndexingSetMultiAxisVec::make(cpp_items); | |||
} | |||
std::vector<PyObject*> q; | |||
q.resize(tensors.size() + 3); | |||
py::object Set_Op = py::cast(set_op); | |||
q[0] = Set_Op.ptr(); | |||
q[1] = tensor.ptr(); | |||
q[2] = val.ptr(); | |||
for (size_t i = 0; i < tensors.size(); ++i) { | |||
q[i + 3] = tensors[i].ptr(); | |||
} | |||
py::tuple result = | |||
py::reinterpret_steal<py::object>(py_apply(NULL, q.data(), q.size())); | |||
py::object res = result[0]; | |||
if (up[4].cast<bool>()) { | |||
py::object reshape_func = getattr(res, "reshape"); | |||
PyObject* Args = PyTuple_New(1); | |||
PyTuple_SetItem(Args, 0, org_shape.release().ptr()); | |||
PyObject* new_tensor = PyObject_CallObject(reshape_func.ptr(), Args); | |||
Py_XDECREF(Args); | |||
res = py::reinterpret_steal<py::object>(new_tensor); | |||
} | |||
return res; | |||
} | |||
PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs) { | |||
try { | |||
return _make_shape_tuple(py::handle(args[0])).release().ptr(); | |||
} | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||
try { | |||
return _getitem_cpp(py::handle(args[0]), py::handle(args[1])).release().ptr(); | |||
} | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs) { | |||
try { | |||
return _setitem_cpp( | |||
py::handle(args[0]), py::handle(args[1]), py::handle(args[2])) | |||
.release() | |||
.ptr(); | |||
} | |||
PYEXT17_TRANSLATE_EXC_RET(nullptr) | |||
} | |||
} // namespace mgb::imperative::python |
@@ -0,0 +1,11 @@ | |||
#pragma once | |||
namespace mgb::imperative::python { | |||
PyObject* make_shape_tuple(PyObject* self, PyObject* const* args, size_t nargs); | |||
PyObject* getitem_cpp(PyObject* self, PyObject* const* args, size_t nargs); | |||
PyObject* setitem_cpp(PyObject* self, PyObject* const* args, size_t nargs); | |||
} // namespace mgb::imperative::python |
@@ -642,7 +642,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) { | |||
m_dtr.update_used_time(dest); | |||
MGB_RECORD_EVENT( | |||
TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), | |||
ptr->dev_tensor().raw_ptr()); | |||
ptr->dev_tensor(false).raw_ptr()); | |||
// update tensor desc for static infer | |||
if (dest->desc.layout.ndim) { | |||
mgb_assert( | |||
@@ -730,10 +730,20 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) { | |||
inputs, apply_functor, const_functor); | |||
return outputs; | |||
} | |||
return OpDef::apply_on_physical_tensor(def, inputs, output_descs, validated); | |||
// Check Input Layout | |||
// Get the input layout constraints, and if the constraint is not satisfied | |||
// inplace update the layout and blob to make the tensor contiguous | |||
auto&& constraints = OpDef::get_input_layout_constraint(def, inputs); | |||
for (size_t idx = 0; idx < inputs.size(); ++idx) { | |||
auto&& layout_checker = constraints[idx]; | |||
if (layout_checker) { | |||
inputs[idx]->to_contiguous_inplace(layout_checker); | |||
} | |||
} | |||
return OpDef::apply_on_physical_tensor( | |||
def, std::move(inputs), output_descs, validated); | |||
}; | |||
MGB_RECORD_EVENT(OpExecuteEvent, apply_id, {}, reason); | |||
// Begin profiling operator | |||
SmallVector<std::pair<CompNode, uint64_t>> kernels; | |||
if (profiling_device) { | |||
// Collecting devices | |||
@@ -1 +1,2 @@ | |||
#include "../../../src/core/impl/graph/cg_impl.h" | |||
#include "../../../src/core/impl/graph/var_node_mem_mgr.h" |
@@ -60,6 +60,11 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> OpDef::infer_output_attrs_falli | |||
return def.trait()->infer_output_attrs_fallible(def, inputs); | |||
} | |||
SmallVector<VarNode::LayoutConstraintCallback> OpDef::get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
return def.trait()->get_input_layout_constraint(def, inputs); | |||
} | |||
EncodedSubgraph OpDef::make_backward_graph( | |||
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs, | |||
const SmallVector<bool>& input_requires_grad, | |||
@@ -47,6 +47,10 @@ void OpMethFallbackByProxyGraph::impl( | |||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | |||
func.Base::operator=(proxy_graph_detail::infer_output_attrs_fallible); | |||
} | |||
void OpMethFallbackByProxyGraph::impl( | |||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint) { | |||
func.Base::operator=(proxy_graph_detail::get_input_layout_constraint); | |||
} | |||
void OpMethFallbackByProxyGraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | |||
func.Base::operator=(proxy_graph_detail::make_backward_graph); | |||
} | |||
@@ -63,6 +67,10 @@ void OpMethFallbackFromSubgraph::impl( | |||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible) { | |||
func.Base::operator=(subgraph_detail::infer_output_attrs_fallible); | |||
} | |||
void OpMethFallbackFromSubgraph::impl( | |||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint) { | |||
func.Base::operator=(subgraph_detail::get_input_layout_constraint); | |||
} | |||
void OpMethFallbackFromSubgraph::impl(GradMaker& func, op_meth_tag::GradMaker) { | |||
func.Base::operator=(subgraph_detail::make_backward_graph); | |||
} | |||
@@ -73,6 +73,9 @@ OpMethType(ApplyOnVarNode, | |||
OpMethType(InferOutputAttrsFallible, | |||
decltype(OpDef::infer_output_attrs_fallible)); | |||
OpMethType(GetInputLayoutConstraint, | |||
decltype(OpDef::get_input_layout_constraint)); | |||
OpMethType(GradMaker, | |||
decltype(OpDef::make_backward_graph)); | |||
@@ -119,6 +122,8 @@ struct OpMethFallbackByProxyGraph : OpMethImplBase { | |||
static void impl(ApplyOnPhysicalTensor& func, op_meth_tag::ApplyOnPhysicalTensor); | |||
static void impl( | |||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | |||
static void impl( | |||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint); | |||
static void impl(GradMaker& func, op_meth_tag::GradMaker); | |||
}; | |||
@@ -128,6 +133,8 @@ struct OpMethFallbackFromSubgraph : OpMethImplBase { | |||
static void impl(ApplyOnVarNode& func, op_meth_tag::ApplyOnVarNode); | |||
static void impl( | |||
InferOutputAttrsFallible& func, op_meth_tag::InferOutputAttrsFallible); | |||
static void impl( | |||
GetInputLayoutConstraint& func, op_meth_tag::GetInputLayoutConstraint); | |||
static void impl(GradMaker& func, op_meth_tag::GradMaker); | |||
}; | |||
@@ -179,6 +186,7 @@ struct OpTrait { | |||
ApplyOnDeviceTensorND apply_on_device_tensornd; | |||
ApplyOnVarNode apply_on_var_node; | |||
InferOutputAttrsFallible infer_output_attrs_fallible; | |||
GetInputLayoutConstraint get_input_layout_constraint; | |||
GradMaker make_backward_graph; | |||
Props props; | |||
HashFunc hash; | |||
@@ -199,6 +207,7 @@ struct OpTrait { | |||
cb(apply_on_device_tensornd) \ | |||
cb(apply_on_var_node) \ | |||
cb(infer_output_attrs_fallible) \ | |||
cb(get_input_layout_constraint) \ | |||
cb(make_backward_graph) \ | |||
cb(props) \ | |||
cb(hash) \ | |||
@@ -117,7 +117,7 @@ void InputCallback::scn_do_execute() { | |||
layout.init_contiguous_stride(); | |||
dev_tensor.reset(dev_tensor.storage(), layout); | |||
} | |||
output(0)->reset_dev_tensor_from_tensor(dev_tensor); | |||
output(0)->force_assign_dev_tensor_from_tensor(dev_tensor); | |||
} | |||
cg::OperatorNodeBase* InputCallback::shallow_copy( | |||
@@ -311,7 +311,7 @@ cg::OperatorNodeBase::NodeProp* MutableTensor::do_make_node_prop() const { | |||
} | |||
void MutableTensor::scn_do_execute() { | |||
output(0)->reset_dev_tensor_from_tensor(*m_dev_tensor); | |||
output(0)->force_assign_dev_tensor_from_tensor(*m_dev_tensor); | |||
} | |||
void MutableTensor::init_output_static_infer_desc() { | |||
@@ -83,28 +83,18 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | |||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||
auto& input = inputs[0]; | |||
TensorShape target_shape; | |||
if (validated) { | |||
target_shape = output_descs[0].layout; | |||
} else { | |||
cg::copy_tensor_value_to_shape( | |||
target_shape, inputs[1]->get_value().proxy_to_default_cpu()); | |||
} | |||
TensorPtr output = Tensor::make( | |||
TensorLayout(target_shape, input->dtype()), input->comp_node()); | |||
if (output->layout().is_empty()) { | |||
return {output}; | |||
} | |||
if (input->shape().eq_shape(output->shape())) { | |||
mgb_assert(input->layout().eq_layout(output->layout())); | |||
output->dev_tensor().copy_from_fixlayout(input->dev_tensor()); | |||
} else { | |||
TensorLayout input_layout = input->layout().broadcast(output->shape()); | |||
output->dev_tensor().copy_from_fixlayout( | |||
input->dev_tensor().sub(SubTensorSpec::make_from_layout(input_layout))); | |||
} | |||
return {output}; | |||
def.cast_final_safe<Broadcast>(); | |||
size_t nr_inp = inputs.size(); | |||
mgb_assert(nr_inp == 2, "Broadcast expects 2 inputs; got %lu actually", nr_inp); | |||
auto&& src = inputs[0]; | |||
auto&& tshp_nd = inputs[1]; | |||
auto slayout = src->layout(); | |||
TensorShape tshp; | |||
cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | |||
TensorLayout tlayout = slayout.broadcast(tshp); | |||
// memory forward | |||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||
} | |||
OP_TRAIT_REG(Broadcast, Broadcast, opr::Broadcast) | |||
@@ -184,10 +174,6 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||
auto&& tshp_nd = inputs[1]; | |||
auto slayout = src->layout(); | |||
if (validated) { | |||
return {Tensor::make(src->blob(), 0, output_descs[0].layout)}; | |||
} | |||
TensorShape tshp; | |||
cg::copy_tensor_value_to_shape(tshp, tshp_nd->get_value().proxy_to_default_cpu()); | |||
if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | |||
@@ -195,13 +181,39 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||
tshp[op_def.axis] = 1; | |||
tshp[op_def.axis] = src->layout().total_nr_elems() / tshp.total_nr_elems(); | |||
} | |||
return {Tensor::make(src->blob(), 0, slayout.reshape(tshp))}; | |||
TensorLayout tlayout; | |||
mgb_assert(slayout.try_reshape(tlayout, tshp)); | |||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||
} | |||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
auto&& op_def = def.cast_final_safe<Reshape>(); | |||
SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||
layout_checker[0] = [&](const TensorLayout& layout) { | |||
TensorShape tshp; | |||
TensorLayout ret; | |||
cg::copy_tensor_value_to_shape( | |||
tshp, inputs[1]->get_value().proxy_to_default_cpu()); | |||
if (op_def.axis != opr::Reshape::Param::INVALID_AXIS) { | |||
mgb_assert(tshp[op_def.axis] == -1); | |||
tshp[op_def.axis] = 1; | |||
tshp[op_def.axis] = layout.total_nr_elems() / tshp.total_nr_elems(); | |||
} | |||
if (layout.try_reshape(ret, tshp)) { | |||
return true; | |||
} else { | |||
return false; | |||
} | |||
}; | |||
return layout_checker; | |||
} | |||
OP_TRAIT_REG(Reshape, Reshape) | |||
.apply_on_var_node(apply_on_var_node) | |||
.infer_output_attrs_fallible(infer_output_attrs_fallible) | |||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||
.get_input_layout_constraint(get_input_layout_constraint) | |||
.fallback(); | |||
} // namespace reshape | |||
@@ -220,12 +220,22 @@ cg::OperatorNodeBase* apply_inplace_add_on_var_node( | |||
SmallVector<TensorPtr> apply_inplace_add_on_physical_tensor( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||
mgb_assert( | |||
inputs[0]->blob().use_count() == 1 && inputs[0]->blob()->storage().unique(), | |||
"This inplace modification may change the elements of other tensors. " | |||
"Please set MEGENGINE_INPLACE_UPDATE to 0 to ensure the program runs " | |||
"correctly."); | |||
auto dest = inputs[0], delta = inputs[1], alpha = inputs[2], beta = inputs[3]; | |||
if (!(inputs[0]->blob().unique() && inputs[0]->blob()->storage().unique())) { | |||
mgb_log_warn( | |||
"This inplace modification may change the elements of other tensors. " | |||
"Fallback to non-inplace update."); | |||
DeviceTensorStorage storage; | |||
storage.reset(dest->comp_node(), dest->blob()->size(), dest->blob()->storage()); | |||
storage = storage.sub(dest->offset()); | |||
DeviceTensorND dv; | |||
dv.reset(storage, dest->layout()); | |||
DeviceTensorND dv_new; | |||
dv_new.copy_from(dv); | |||
dest = Tensor::make(dv_new); | |||
} | |||
auto tensor_to_scalar = [](const TensorPtr& tensor) -> float { | |||
return *tensor->get_value().ptr<float>(); | |||
}; | |||
@@ -54,7 +54,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||
if (memory_forward_success(def, inputs)) { | |||
return {Tensor::make(inputs[0]->blob(), 0, inputs[0]->layout())}; | |||
return {Tensor::make( | |||
inputs[0]->blob(), inputs[0]->offset(), inputs[0]->layout())}; | |||
} | |||
return proxy_graph_detail::apply_on_physical_tensor( | |||
def, inputs, output_descs, validated); | |||
@@ -73,11 +74,21 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | |||
return {output_descs, validated}; | |||
} | |||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||
layout_checker[0] = [](const TensorLayout& layout) { | |||
return layout.is_contiguous(); | |||
}; | |||
return layout_checker; | |||
} | |||
OP_TRAIT_REG(Reduce, Reduce, opr::Reduce) | |||
.make_from_op_node(make_from_op_node) | |||
.apply_on_var_node(apply_on_var_node) | |||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||
.infer_output_attrs_fallible(infer_output_attrs_fallible) | |||
.get_input_layout_constraint(get_input_layout_constraint) | |||
.fallback(); | |||
} // namespace reduce | |||
} // namespace | |||
@@ -594,6 +594,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible<Dro | |||
return {dests, true}; | |||
} | |||
template <typename Op> | |||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
SmallVector<VarNode::LayoutConstraintCallback> layout_checker(inputs.size()); | |||
return layout_checker; | |||
} | |||
} // anonymous namespace | |||
Handle new_handle(CompNode comp_node, uint64_t seed) { | |||
@@ -622,6 +629,7 @@ CompNode get_rng_handle_compnode(Handle handle) { | |||
.apply_on_var_node(apply_on_var_node<NAME, Output>) \ | |||
.apply_on_physical_tensor(apply_on_physical_tensor<NAME>) \ | |||
.infer_output_attrs_fallible(infer_output_attrs_fallible<NAME>) \ | |||
.get_input_layout_constraint(get_input_layout_constraint<NAME>) \ | |||
.fallback(); \ | |||
} | |||
@@ -60,9 +60,55 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||
return opr::Dimshuffle::make(inputs[0], ds.pattern, 0UL, config); | |||
} | |||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||
auto&& ds = static_cast<const Dimshuffle&>(def); | |||
mgb_assert( | |||
ds.pattern.size() <= TensorShape::MAX_NDIM, | |||
"Dimshuffle pattern exceeds max length of %zd", TensorShape::MAX_NDIM); | |||
size_t nr_inp = inputs.size(); | |||
mgb_assert(nr_inp == 1, "Dimshuffle expects 1 inputs; got %lu actually", nr_inp); | |||
auto&& src = inputs[0]; | |||
auto inp_layout = src->layout(); | |||
size_t pattern_ndim = *std::max_element(ds.pattern.begin(), ds.pattern.end()) + 1; | |||
mgb_assert( | |||
inp_layout.ndim == pattern_ndim, | |||
"input ndim mismatch for Dimshuffle: expect=%zd actual=%zd", pattern_ndim, | |||
inp_layout.ndim); | |||
TensorLayout out_layout{inp_layout.dtype}; | |||
out_layout.ndim = ds.pattern.size(); | |||
size_t idx = 0; | |||
bool input_used[TensorLayout::MAX_NDIM] = {0}; | |||
for (auto i : ds.pattern) { | |||
if (i < 0) { | |||
out_layout.shape[idx] = 1; | |||
out_layout.stride[idx] = 1; | |||
} else { | |||
input_used[i] = true; | |||
out_layout.shape[idx] = inp_layout.shape[i]; | |||
out_layout.stride[idx] = inp_layout.stride[i]; | |||
} | |||
++idx; | |||
} | |||
if (out_layout.is_contiguous()) { | |||
out_layout.init_contiguous_stride(); | |||
} | |||
for (size_t i = 0; i < pattern_ndim; ++i) { | |||
mgb_assert( | |||
input_used[i] || inp_layout.shape[i] == 1, | |||
"non-1 dim discarded in Dimshuffle: ishp=%s dim=%zd", | |||
inp_layout.megdnn::TensorShape::to_string().c_str(), i); | |||
} | |||
// memory forward | |||
return {Tensor::make(src->blob(), src->offset(), out_layout)}; | |||
} | |||
OP_TRAIT_REG(Dimshuffle, Dimshuffle, opr::Dimshuffle) | |||
.make_from_op_node(make_from_op_node) | |||
.apply_on_var_node(apply_on_var_node) | |||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||
.fallback(); | |||
} // namespace dimshuffle | |||
} // namespace | |||
@@ -80,7 +126,25 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||
return opr::AxisAddRemove::make(inputs[0], param, config); | |||
} | |||
OP_TRAIT_REG(AddAxis, AddAxis).apply_on_var_node(apply_on_var_node).fallback(); | |||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||
auto&& op_def = def.cast_final_safe<AddAxis>(); | |||
size_t nr_inp = inputs.size(); | |||
mgb_assert(nr_inp == 1, "AddAxis expects 1 inputs; got %lu actually", nr_inp); | |||
auto&& src = inputs[0]; | |||
auto tlayout = src->layout(); | |||
for (auto&& i : op_def.axis) { | |||
tlayout.add_axis_cont_inplace(i); | |||
} | |||
// memory forward | |||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||
} | |||
OP_TRAIT_REG(AddAxis, AddAxis) | |||
.apply_on_var_node(apply_on_var_node) | |||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||
.fallback(); | |||
} // namespace add_axis | |||
} // namespace | |||
@@ -97,7 +161,36 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { | |||
return opr::AxisAddRemove::make(inputs[0], param, config); | |||
} | |||
OP_TRAIT_REG(RemoveAxis, RemoveAxis).apply_on_var_node(apply_on_var_node).fallback(); | |||
SmallVector<TensorPtr> apply_on_physical_tensor( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs, | |||
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) { | |||
auto&& op_def = def.cast_final_safe<RemoveAxis>(); | |||
size_t nr_inp = inputs.size(); | |||
mgb_assert(nr_inp == 1, "RemoveAxis expects 1 inputs; got %lu actually", nr_inp); | |||
auto&& src = inputs[0]; | |||
auto tlayout = src->layout(); | |||
for (auto&& i : op_def.axis) { | |||
if (tlayout.ndim == 1) { | |||
mgb_assert( | |||
tlayout.shape[0] == 1 && i == 0, | |||
"can not remove axis %u from tensor of shape=%s", i, | |||
tlayout.megdnn::TensorShape::to_string().c_str()); | |||
} else { | |||
mgb_assert( | |||
i < tlayout.ndim && tlayout.shape[i] == 1, | |||
"can not remove axis %u from tensor of shape=%s", i, | |||
tlayout.megdnn::TensorShape::to_string().c_str()); | |||
tlayout.remove_axis_inplace(i); | |||
} | |||
} | |||
// memory forward | |||
return {Tensor::make(src->blob(), src->offset(), tlayout)}; | |||
} | |||
OP_TRAIT_REG(RemoveAxis, RemoveAxis) | |||
.apply_on_var_node(apply_on_var_node) | |||
.apply_on_physical_tensor(apply_on_physical_tensor) | |||
.fallback(); | |||
} // namespace remove_axis | |||
} // namespace | |||
@@ -411,7 +411,7 @@ struct ComputingGraphHolder { | |||
executable->wait(); | |||
size_t nr_inputs = inputs.size(); | |||
for (size_t i = 0; i < nr_inputs; ++i) { | |||
auto input_dev_tensor = input_tensors[i]->dev_tensor(); | |||
auto input_dev_tensor = input_tensors[i]->dev_tensor(false); | |||
inputs[i].device_value->reset( | |||
input_dev_tensor.storage(), input_dev_tensor.layout()); | |||
if (inputs[i].host_value) { | |||
@@ -95,7 +95,13 @@ const Blob::RawStorage& Blob::storage() { | |||
Tensor::Tensor( | |||
BlobPtr blob, const TensorLayout& layout, size_t offset, const HostTensorND& hv) | |||
: m_layout(layout), m_blob(std::move(blob)), m_offset(offset), m_value(hv) {} | |||
: m_cn(blob->comp_node()), | |||
m_shape(layout), | |||
m_dtype(layout.dtype), | |||
m_layout(layout), | |||
m_blob(std::move(blob)), | |||
m_offset(offset), | |||
m_value(hv) {} | |||
Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||
constexpr int size_threshold = TensorShape::MAX_NDIM; | |||
@@ -107,7 +113,12 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||
MGB_RECORD_EVENT( | |||
profiler::HostToDeviceEvent, hv.layout(), hv.comp_node(), hv.raw_ptr(), | |||
dev_tensor().raw_ptr()); | |||
dev_tensor().copy_from_fixlayout(hv); | |||
DeviceTensorStorage storage; | |||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||
storage = storage.sub(m_offset); | |||
DeviceTensorND dv; | |||
dv.reset(storage, m_layout); | |||
dv.copy_from_fixlayout(hv); | |||
// even though hv is saved in m_value, Tensor itself could be | |||
// released before copy completes | |||
MGB_RECORD_EVENT( | |||
@@ -117,25 +128,36 @@ Tensor::Tensor(const HostTensorND& hv) : Tensor(hv.layout(), hv.comp_node()) { | |||
} | |||
} | |||
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) { | |||
Tensor::Tensor(const DeviceTensorND& dv, const HostTensorND& hv) | |||
: m_offset(dv.storage().offset()), | |||
m_cn(dv.comp_node()), | |||
m_shape(dv.layout()), | |||
m_dtype(dv.layout().dtype), | |||
m_blob(Blob::make(dv.storage())), | |||
m_layout(dv.layout()) { | |||
if (!hv.empty()) { | |||
mgb_assert(dv.comp_node() == hv.comp_node()); | |||
mgb_assert(dv.dtype() == hv.dtype()); | |||
mgb_assert(dv.shape().eq_shape(hv.shape())); | |||
m_value = hv; | |||
} | |||
m_layout = dv.layout(); | |||
m_blob = Blob::make(dv.storage()); | |||
m_offset = dv.storage().offset(); | |||
} | |||
Tensor::Tensor(const TensorLayout& layout, const CompNode& cn) | |||
: m_layout{layout}, | |||
m_blob{Blob::make(cn, layout.span().dist_byte())}, | |||
m_offset{0} {} | |||
m_offset{0}, | |||
m_cn(cn), | |||
m_shape(layout), | |||
m_dtype(layout.dtype) {} | |||
Tensor::Tensor(const BlobPtr blob, const size_t offset, const TensorLayout& layout) | |||
: m_layout{layout}, m_blob{blob}, m_offset{offset} {} | |||
: m_layout{layout}, | |||
m_blob{blob}, | |||
m_offset{offset}, | |||
m_cn(blob->comp_node()), | |||
m_shape(layout), | |||
m_dtype(layout.dtype) {} | |||
TensorPtr Tensor::make(const HostTensorND& hv) { | |||
auto&& blob = MultiCNConstTensorCache::inst().lookup(hv); | |||
@@ -145,10 +167,45 @@ TensorPtr Tensor::make(const HostTensorND& hv) { | |||
return std::make_shared<Tensor>(hv); | |||
} | |||
DeviceTensorND Tensor::dev_tensor() { | |||
void Tensor::to_contiguous_inplace(VarNode::LayoutConstraintCallback& layout_checker) { | |||
MGB_LOCK_GUARD(m_blob_mtx); | |||
if (!m_layout.is_empty() && !layout_checker(m_layout)) { | |||
DeviceTensorStorage storage; | |||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||
storage = storage.sub(m_offset); | |||
DeviceTensorND dv; | |||
dv.reset(storage, m_layout); | |||
DeviceTensorND dv_contig; | |||
dv_contig.copy_from(dv); | |||
m_layout = dv_contig.layout(); | |||
std::atomic_store(&m_blob, Blob::make(dv_contig.storage())); | |||
mgb_assert(m_layout.is_contiguous()); | |||
m_offset = 0; | |||
} | |||
} | |||
void Tensor::to_contiguous_inplace() { | |||
static VarNode::LayoutConstraintCallback default_cb = | |||
[](const TensorLayout& layout) { return layout.is_contiguous(); }; | |||
to_contiguous_inplace(default_cb); | |||
} | |||
void Tensor::assign_from_dev_tensor(DeviceTensorND dv) { | |||
MGB_LOCK_GUARD(m_blob_mtx); | |||
std::atomic_store(&m_blob, Blob::make(dv.storage())); | |||
m_offset = dv.storage().offset(); | |||
m_layout = dv.layout(); | |||
} | |||
DeviceTensorND Tensor::dev_tensor(bool contiguous) { | |||
mgb_assert(m_blob, "uninitialized tensor."); | |||
if (contiguous) { | |||
to_contiguous_inplace(); | |||
} | |||
MGB_LOCK_GUARD(m_blob_mtx); | |||
DeviceTensorStorage storage; | |||
storage.reset(m_blob->comp_node(), m_blob->size(), m_blob->storage()); | |||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||
storage = storage.sub(m_offset); | |||
DeviceTensorND ret; | |||
ret.reset(storage, m_layout); | |||
@@ -156,16 +213,22 @@ DeviceTensorND Tensor::dev_tensor() { | |||
} | |||
void Tensor::fetch_value() { | |||
MGB_LOCK_GUARD(m_mtx); | |||
MGB_LOCK_GUARD(m_blob_mtx); | |||
MGB_LOCK_GUARD(m_value_mtx); | |||
if (m_value.empty()) { | |||
m_value.copy_from(dev_tensor()); | |||
DeviceTensorStorage storage; | |||
storage.reset(m_cn, m_blob->size(), m_blob->storage()); | |||
storage = storage.sub(m_offset); | |||
DeviceTensorND dv; | |||
dv.reset(storage, m_layout); | |||
m_value.copy_from(dv); | |||
m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); | |||
m_value_ready->record(); | |||
} | |||
} | |||
bool Tensor::value_fetched() { | |||
MGB_LOCK_GUARD(m_mtx); | |||
MGB_LOCK_GUARD(m_value_mtx); | |||
return m_value.layout().ndim != 0; | |||
} | |||
@@ -178,7 +241,7 @@ const HostTensorND& Tensor::get_value() { | |||
} | |||
const HostTensorND* Tensor::try_get_value() { | |||
MGB_LOCK_GUARD(m_mtx); | |||
MGB_LOCK_GUARD(m_value_mtx); | |||
if (!m_value.empty() && (!m_value_ready || m_value_ready->finished())) { | |||
return &m_value; | |||
} | |||
@@ -193,7 +256,7 @@ TensorPtr Tensor::make_scalar(DTypeScalar value, CompNode cn) { | |||
} | |||
TensorPtr Tensor::sub(size_t offset, TensorShape shape) { | |||
TensorLayout layout(shape, m_layout.dtype); | |||
TensorLayout layout(shape, m_dtype); | |||
return Tensor::make(m_blob, offset + m_offset, layout); | |||
} | |||
@@ -73,7 +73,7 @@ public: | |||
static SymbolVar make(ComputingGraph& graph, Tensor& tensor) { | |||
auto opr = graph.insert_opr(std::make_unique<InputPlaceholder>(graph, &tensor)); | |||
auto var = opr->output(0); | |||
auto&& dev_tensor = tensor.dev_tensor(); | |||
auto&& dev_tensor = tensor.dev_tensor(false); | |||
var->m_comp_node = dev_tensor.comp_node(); | |||
var->m_shape = dev_tensor.shape(); | |||
if (dev_tensor.empty()) { | |||
@@ -81,10 +81,7 @@ public: | |||
layout.init_contiguous_stride(); | |||
dev_tensor.reset(dev_tensor.storage(), layout); | |||
} | |||
var->m_dev_tensor = dev_tensor; | |||
var->m_mem_plan.reset_from_owner_var() | |||
.chunk() | |||
.mem_alloc_status.set_from_owner_var(); | |||
var->force_assign_dev_tensor_from_tensor(dev_tensor); | |||
return var; | |||
} | |||
@@ -314,15 +314,11 @@ public: | |||
size_t idx = 0; | |||
for (auto&& input : opr_inputs) { | |||
mgb_assert(input->owner_opr()->same_type<InputPlaceholder>()); | |||
input->m_dev_tensor.storage({}); | |||
auto&& dev_tensor = inputs[input_remap[idx]]->dev_tensor(); | |||
auto&& dev_tensor = inputs[input_remap[idx]]->dev_tensor(false); | |||
auto&& layout = dev_tensor.layout(); | |||
input->shape(dev_tensor.shape()); | |||
auto&& chk = input->m_mem_plan.reset_from_owner_var().chunk(); | |||
input->m_dev_tensor.reset(dev_tensor.storage(), layout); | |||
input->m_mem_plan.layout(layout); | |||
chk.mem_alloc_status.set_from_owner_var(); | |||
input->force_assign_dev_tensor_from_tensor(dev_tensor); | |||
mgb_assert(input->comp_node() == dev_tensor.comp_node()); | |||
mgb_assert(input->shape().eq_shape(layout)); | |||
@@ -335,9 +331,14 @@ public: | |||
mgb_assert(m_opr->usable_output().size() == outputs.size()); | |||
::mgb::opr::intl::WorkspaceLimitHook::set_impl( | |||
m_opr->owner_graph(), get_workspace_limit); | |||
size_t j = 0; | |||
for (auto&& var : m_opr->output()) { | |||
auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk(); | |||
chk.mem_alloc_status.set_from_owner_var(); | |||
} | |||
m_opr->mem_plan_fwd_in2out_readonly(); | |||
size_t j = 0; | |||
for (auto&& var : m_opr->output()) { | |||
if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) { | |||
TensorLayout layout{var->shape(), var->dtype(), var->format()}; | |||
var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag( | |||
@@ -349,18 +350,16 @@ public: | |||
mgb_assert(var->comp_node() == tensor->comp_node()); | |||
mgb_assert(var->shape().eq_shape(layout)); | |||
mgb_assert(var->dtype() == layout.dtype); | |||
var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); | |||
if (var->m_mem_plan.chunk().owner_var != var) { | |||
tensor->assign_from_dev_tensor( | |||
var->m_dev_tensor); // memory forwarding | |||
} else { | |||
var->assign_dev_tensor_from_tensor(tensor->dev_tensor()); | |||
} | |||
++j; | |||
} | |||
chk.mem_alloc_status.set_from_owner_var(); | |||
} | |||
mgb_assert(j == outputs.size()); | |||
// Memory forwarding was bypassed in megbrain with graph option | |||
// imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly | |||
// to initialize some opr(e.g. Subtensor)'s internal state | |||
// TODO: implement memory forwarding | |||
m_opr->mem_plan_fwd_in2out_readonly(); | |||
{ | |||
// some opr (e.g. Reduce) rely on on_mem_status_changed to set | |||
// input/output tensor corretly, since we bypass var_node_mem_mgr | |||
@@ -840,7 +839,7 @@ public: | |||
Tensor::make(output_descs[i].layout, output_descs[i].comp_node); | |||
} | |||
auto raw_outputs = to_raw_ptr_array(outputs); | |||
auto raw_outputs = to_raw_ptr_array(outputs, false); | |||
CompNode::UnorderedSet used_cns; | |||
for (auto&& out : raw_outputs) { | |||
auto cn = out->comp_node(); | |||
@@ -9,8 +9,12 @@ | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#include "../mgb_cg_impl.h" | |||
#include "./mini_graph.h" | |||
#include "megbrain/opr/io.h" | |||
using LayoutConstraintLevel = mgb::cg::VarNodeMemManager::LayoutConstraintLevel; | |||
using LayoutConstraintCallback = mgb::VarNode::LayoutConstraintCallback; | |||
namespace mgb::imperative::proxy_graph { | |||
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder); | |||
@@ -34,4 +38,81 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||
return ret; | |||
} | |||
std::unordered_map<size_t, SmallVector<LayoutConstraintCallback>> | |||
input_layout_constraints_cache; | |||
SmallVector<LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
auto get_input_layout_constraint_hash_key = | |||
[](const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
XXHash state; | |||
size_t length = 0, data[1 + inputs.size()]; | |||
data[length++] = def.hash(); | |||
for (auto&& i : inputs) { | |||
data[length++] = mgb::hash(i->comp_node()); | |||
} | |||
state.update(data, length * sizeof(size_t)); | |||
return state.digest(); | |||
}; | |||
auto hash_key = get_input_layout_constraint_hash_key(def, inputs); | |||
auto&& iter = input_layout_constraints_cache.find(hash_key); | |||
if (iter != input_layout_constraints_cache.end()) { | |||
return iter->second; | |||
} | |||
static cg::ComputingGraphImpl* graph = | |||
imperative::ResourceManager::create_global<cg::ComputingGraphImpl>(); | |||
VarNodeArray vinputs(inputs.size()); | |||
for (size_t i = 0; i < inputs.size(); ++i) { | |||
OperatorNodeConfig config; | |||
auto&& layout = inputs[i]->layout(); | |||
layout.init_contiguous_stride(); | |||
vinputs[i] = graph->insert_opr(std::make_unique<mgb::opr::SharedDeviceTensor>( | |||
*graph, | |||
std::make_shared<DeviceTensorND>( | |||
inputs[i]->comp_node(), layout), | |||
false, config)) | |||
->output(0); | |||
} | |||
auto&& opr = OpDef::apply_on_var_node(def, vinputs)[0]->owner_opr(); | |||
opr->add_input_layout_constraint(); | |||
SmallVector<LayoutConstraintCallback> res(inputs.size()); | |||
auto& mem_mgr = graph->var_node_mem_manager(); | |||
for (size_t i = 0; i < vinputs.size(); ++i) { | |||
auto& trait = mem_mgr.get_var_node_mem_trait(vinputs[i]); | |||
switch (trait.layout_constraint.level) { | |||
case LayoutConstraintLevel::CONTIG: | |||
res[i] = [](const TensorLayout& layout) { | |||
return layout.is_contiguous(); | |||
}; | |||
break; | |||
case LayoutConstraintLevel::MONOTONE: | |||
res[i] = [&trait](const TensorLayout& layout) { | |||
if (!layout.is_abs_monotonous_allow_brdcst()) { | |||
return false; | |||
} | |||
for (auto&& i : trait.layout_constraint.custom) | |||
if (!i(layout)) | |||
return false; | |||
return true; | |||
}; | |||
break; | |||
case LayoutConstraintLevel::NONE: | |||
if (!trait.layout_constraint.custom.empty()) { | |||
res[i] = [&trait](const TensorLayout& layout) { | |||
for (auto&& i : trait.layout_constraint.custom) | |||
if (!i(layout)) | |||
return false; | |||
return true; | |||
}; | |||
} | |||
break; | |||
default: | |||
mgb_throw(InternalError, "invalid layout_constraint_level"); | |||
} | |||
} | |||
input_layout_constraints_cache.emplace(hash_key, res); | |||
return res; | |||
} | |||
} // namespace mgb::imperative::proxy_graph_detail |
@@ -17,6 +17,8 @@ | |||
#include "./op_trait.h" | |||
using LayoutConstraintCallback = mgb::VarNode::LayoutConstraintCallback; | |||
namespace mgb { | |||
namespace imperative { | |||
namespace subgraph_detail { | |||
@@ -73,6 +75,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||
const std::shared_ptr<OpDef>& op, | |||
const SmallVector<TensorPtr>& inputs, | |||
size_t nr_outputs) { | |||
auto&& constraints = OpDef::get_input_layout_constraint(*op, inputs); | |||
for (size_t idx = 0; idx < inputs.size(); ++idx) { | |||
auto&& layout_checker = constraints[idx]; | |||
if (layout_checker) { | |||
inputs[idx]->to_contiguous_inplace(layout_checker); | |||
} | |||
} | |||
// do not use infered output_desc in subgraph | |||
return OpDef::apply_on_physical_tensor(*op, inputs, output_descs, false); | |||
}; | |||
@@ -81,6 +90,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||
return outputs; | |||
} | |||
SmallVector<LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
SmallVector<LayoutConstraintCallback> res(inputs.size()); | |||
return res; | |||
} | |||
static EncodedSubgraph make_backward_graph_from_forward( | |||
const SmallVector<LogicalTensorDesc>& inputs, | |||
const SmallVector<bool>& input_requires_grad, | |||
@@ -78,6 +78,9 @@ public: | |||
static EncodedSubgraph make_forward_graph( | |||
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs); | |||
static SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||
const OpTrait* trait() const; | |||
std::string to_string() const; | |||
@@ -14,6 +14,7 @@ | |||
#include <memory> | |||
#include <mutex> | |||
#include "megbrain/graph.h" | |||
#include "megbrain/imperative/resource_manager.h" | |||
#include "megbrain/tensor.h" | |||
@@ -90,18 +91,24 @@ public: | |||
CompNode comp_node() const { | |||
mgb_assert(m_blob, "uninitialized tensor."); | |||
return m_blob->comp_node(); | |||
return m_cn; | |||
} | |||
DType dtype() const { return m_layout.dtype; } | |||
DType dtype() const { return m_dtype; } | |||
TensorLayout layout() const { return m_layout; } | |||
const TensorShape& shape() const { return m_layout; } | |||
const TensorShape& shape() const { return m_shape; } | |||
size_t offset() const { return m_offset; } | |||
DeviceTensorND dev_tensor(); | |||
void to_contiguous_inplace(VarNode::LayoutConstraintCallback&); | |||
void to_contiguous_inplace(); | |||
DeviceTensorND dev_tensor(bool contiguous = true); | |||
void assign_from_dev_tensor(DeviceTensorND); | |||
static TensorPtr make_scalar(DTypeScalar value, CompNode cn); | |||
@@ -110,7 +117,7 @@ public: | |||
return make_scalar(value, m_blob->comp_node()); | |||
} | |||
BlobPtr& blob() { return m_blob; } | |||
BlobPtr blob() { return m_blob; } | |||
void fetch_value(); | |||
bool value_fetched(); | |||
@@ -131,10 +138,16 @@ public: | |||
static void static_initialize(); | |||
private: | |||
TensorLayout m_layout; | |||
BlobPtr m_blob; | |||
size_t m_offset; | |||
std::mutex m_mtx; | |||
const CompNode m_cn; | |||
const TensorShape m_shape; | |||
const DType m_dtype; | |||
std::mutex m_blob_mtx; | |||
BlobPtr m_blob; | |||
TensorLayout m_layout; | |||
std::mutex m_value_mtx; | |||
HostTensorND m_value; | |||
EventPtr m_value_ready = nullptr; | |||
}; | |||
@@ -33,6 +33,9 @@ EncodedSubgraph make_backward_graph( | |||
const SmallVector<bool>& input_requires_grad, | |||
const SmallVector<bool>& output_has_grad); | |||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||
} // namespace proxy_graph_detail | |||
} // namespace imperative | |||
} // namespace mgb | |||
@@ -36,6 +36,9 @@ EncodedSubgraph make_backward_graph( | |||
const SmallVector<bool>& input_requires_grad, | |||
const SmallVector<bool>& output_has_grad); | |||
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs); | |||
} // namespace subgraph_detail | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -322,7 +322,7 @@ void ComputingGraphImpl::free_varnode_storage(void* ptr) { | |||
m_var_node_pool.free_raw(ptr); | |||
}; | |||
OperatorNodeBase* ComputingGraphImpl::insert_opr( | |||
MGE_WIN_DECLSPEC_FUC OperatorNodeBase* ComputingGraphImpl::insert_opr( | |||
std::unique_ptr<OperatorNodeBase> opr_uniqp) { | |||
auto opr = opr_uniqp.get(); | |||
@@ -148,8 +148,8 @@ class ComputingGraphImpl final : public ComputingGraph { | |||
public: | |||
class ComputingSequence; | |||
ComputingGraphImpl(); | |||
~ComputingGraphImpl(); | |||
MGE_WIN_DECLSPEC_FUC ComputingGraphImpl(); | |||
MGE_WIN_DECLSPEC_FUC ~ComputingGraphImpl(); | |||
template <typename T> | |||
static ComputingGraphImpl* downcast(T* ptr) = delete; | |||
@@ -166,7 +166,8 @@ public: | |||
SmallVector<std::unique_ptr<AsyncExecutable>> compile_multi_part( | |||
const SmallVector<OutputSpec>& out_specs) override; | |||
OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr) override; | |||
MGE_WIN_DECLSPEC_FUC OperatorNodeBase* insert_opr( | |||
std::unique_ptr<OperatorNodeBase> opr) override; | |||
void* alloc_varnode_storage() override; | |||
@@ -93,6 +93,23 @@ MemAllocPlan& MemAllocPlan::assign_for_forward( | |||
return *this; | |||
} | |||
MemAllocPlan& MemAllocPlan::force_assign_for_forward( | |||
const MemAllocPlan& src, const SubTensorSpec& sub) { | |||
mgb_assert(valid() && src.valid() && m_layout.eq_shape(sub.layout())); | |||
++(m_chunk = src.m_chunk)->m_refcnt; | |||
m_layout = sub.layout(); | |||
// make layout strong-contig | |||
for (int i = static_cast<int>(m_layout.ndim) - 1; i >= 0; --i) { | |||
if (m_layout.shape[i] == 1) { | |||
m_layout.stride[i] = i + 1 < static_cast<int>(m_layout.ndim) | |||
? m_layout.stride[i + 1] * m_layout.shape[i + 1] | |||
: 1; | |||
} | |||
} | |||
m_layout.dtype = dtype(); | |||
return *this; | |||
} | |||
MemAllocPlan& MemAllocPlan::reset_from_owner_var() { | |||
auto owner_var = m_chunk_storage.owner_var; | |||
m_layout.dtype = dtype(); | |||
@@ -223,7 +240,12 @@ VarNode& VarNode::format(TensorFormat format) { | |||
bool VarNode::set_fwd_in2out_readonly(VarNode* input, const SubTensorSpec& sub) { | |||
if (owner_graph()->options().imperative_proxy_graph) { | |||
return false; | |||
if (input->comp_node() != comp_node()) { | |||
return false; | |||
} | |||
m_mem_plan.force_assign_for_forward(input->m_mem_plan, sub); | |||
m_dev_tensor = input->dev_tensor().sub(sub); | |||
return true; | |||
} | |||
return ComputingGraphImpl::downcast(owner_graph()) | |||
->var_node_mem_manager() | |||
@@ -361,6 +383,13 @@ VarNode& VarNode::reset_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||
return *this; | |||
} | |||
void VarNode::force_assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||
m_dev_tensor = value; | |||
shape(value.shape()); | |||
m_mem_plan.reset_from_owner_var().chunk().mem_alloc_status.set_from_owner_var(); | |||
m_mem_plan.layout(value.layout()); | |||
} | |||
void VarNode::assign_dev_tensor_from_tensor(const DeviceTensorND& value) { | |||
mgb_assert( | |||
(value.layout().is_contiguous() || value.empty()) && | |||
@@ -475,7 +475,7 @@ DEF(CompNode node, const TensorShape& shape, DType dtype, TensorFormat format) | |||
DEF(CompNode node, const TensorLayout& layout) | |||
: TensorND(node, layout, layout.dtype, layout.format) { | |||
mgb_assert( | |||
layout.is_contiguous(), | |||
layout.is_contiguous() || layout.is_empty(), | |||
"non-contiguous layout used for initializing a tensor: %s", | |||
layout.to_string().c_str()); | |||
} | |||
@@ -241,7 +241,8 @@ public: | |||
* \return the node in the graph (maybe another node due to | |||
* deduplication) | |||
*/ | |||
virtual OperatorNodeBase* insert_opr(std::unique_ptr<OperatorNodeBase> opr) = 0; | |||
MGE_WIN_DECLSPEC_FUC virtual OperatorNodeBase* insert_opr( | |||
std::unique_ptr<OperatorNodeBase> opr) = 0; | |||
/*! | |||
* \brief used by OperatorNodeBase to allocate its outputs | |||
@@ -194,6 +194,10 @@ public: | |||
MGE_WIN_DECLSPEC_FUC MemAllocPlan& assign_for_forward( | |||
const MemAllocPlan& src, const SubTensorSpec& sub); | |||
//! force assign for readonly forward | |||
MGE_WIN_DECLSPEC_FUC MemAllocPlan& force_assign_for_forward( | |||
const MemAllocPlan& src, const SubTensorSpec& sub); | |||
/*! | |||
* \brief next readonly-forward reader of this MemAllocPlan | |||
* | |||
@@ -509,6 +513,9 @@ public: | |||
//! NO_SYS_MEM_ALLOC can be modified. | |||
MGE_WIN_DECLSPEC_FUC bool is_graph_dest_varnode(); | |||
MGE_WIN_DECLSPEC_FUC void force_assign_dev_tensor_from_tensor( | |||
const DeviceTensorND& value); | |||
private: | |||
//! whether its memory should be allocated by mgb system during graph | |||
//! execution; initialized in VarNodeMemManager::reset_opr_seq() | |||
@@ -24,7 +24,7 @@ namespace intl { | |||
* \brief base class for IO nodes between device and host | |||
*/ | |||
class HostIONodeBase : public cg::SingleCNOperatorNodeBase { | |||
void init_output_static_infer_desc() override final; | |||
MGE_WIN_DECLSPEC_FUC void init_output_static_infer_desc() override final; | |||
protected: | |||
using cg::SingleCNOperatorNodeBase::SingleCNOperatorNodeBase; | |||
@@ -32,9 +32,10 @@ protected: | |||
/*! | |||
* \brief src_type for static shape and value infer | |||
*/ | |||
virtual cg::static_infer::SourceType static_infer_src_type() const; | |||
MGE_WIN_DECLSPEC_FUC virtual cg::static_infer::SourceType static_infer_src_type() | |||
const; | |||
virtual const TensorShape& get_output_shape() = 0; | |||
MGE_WIN_DECLSPEC_FUC virtual const TensorShape& get_output_shape() = 0; | |||
/*! | |||
* \brief fill value in *dest* for static inference | |||
@@ -52,10 +53,10 @@ protected: | |||
class DeviceTensorHolder : public HostIONodeBase { | |||
class DevValueExecDep; | |||
void init_output_format() override; | |||
void init_output_mem_plan(bool dynamic) override final; | |||
void scn_do_execute() override final; | |||
void record_execute_deps(ExecDependencyArray& deps) override; | |||
MGE_WIN_DECLSPEC_FUC void init_output_format() override; | |||
MGE_WIN_DECLSPEC_FUC void init_output_mem_plan(bool dynamic) override final; | |||
MGE_WIN_DECLSPEC_FUC void scn_do_execute() override final; | |||
MGE_WIN_DECLSPEC_FUC void record_execute_deps(ExecDependencyArray& deps) override; | |||
protected: | |||
using HostIONodeBase::HostIONodeBase; | |||
@@ -77,20 +78,20 @@ MGB_DEFINE_CLS_WITH_SUPER(SharedDeviceTensorBase, DeviceTensorHolder) // { | |||
std::shared_ptr<DeviceTensorND> m_dev_data; | |||
bool m_const_value; | |||
const TensorShape& get_output_shape() override; | |||
MGE_WIN_DECLSPEC_FUC const TensorShape& get_output_shape() override; | |||
bool fill_in_static_infer(DeviceTensorND* dest) override { | |||
MGB_MARK_USED_VAR(dest); | |||
return false; | |||
} | |||
void init_output_comp_node() override; | |||
MGE_WIN_DECLSPEC_FUC void init_output_comp_node() override; | |||
public: | |||
//! const_value marks whether the device value of this operator should | |||
//! be treated as constant during graph execution. Should be false in | |||
//! most cases. | |||
SharedDeviceTensorBase( | |||
MGE_WIN_DECLSPEC_FUC SharedDeviceTensorBase( | |||
ComputingGraph& graph, const std::shared_ptr<DeviceTensorND>& dev_data, | |||
bool const_value, const OperatorNodeConfig& config); | |||
@@ -248,7 +249,8 @@ private: | |||
*/ | |||
MGB_DEFINE_OPR_CLASS_WITH_EXPORT( | |||
SharedDeviceTensor, intl::SharedDeviceTensorBase) // { | |||
cg::static_infer::SourceType static_infer_src_type() const override; | |||
MGE_WIN_DECLSPEC_FUC cg::static_infer::SourceType static_infer_src_type() | |||
const override; | |||
public: | |||
using Super::Super; | |||