@@ -143,6 +143,15 @@ if(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold") | ||||
endif() | endif() | ||||
option(MGE_WITH_JIT "Build MegEngine with JIT." ON) | |||||
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" ON) | |||||
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) | |||||
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) | |||||
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) | |||||
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON) | |||||
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF) | |||||
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON) | |||||
if(NOT MGE_WITH_JIT) | if(NOT MGE_WITH_JIT) | ||||
if(MGE_WITH_HALIDE) | if(MGE_WITH_HALIDE) | ||||
message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") | message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") | ||||
@@ -84,6 +84,7 @@ megcoreStatus_t megcoreGetDeviceFlags( | |||||
unsigned int *flags); | unsigned int *flags); | ||||
megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle); | megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle); | ||||
megcoreStatus_t megcoreDeactivate(megcoreDeviceHandle_t handle); | |||||
megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle, | megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle, | ||||
void **devPtr, size_t sizeInBytes); | void **devPtr, size_t sizeInBytes); | ||||
megcoreStatus_t megcoreFree(megcoreDeviceHandle_t handle, | megcoreStatus_t megcoreFree(megcoreDeviceHandle_t handle, | ||||
@@ -86,6 +86,7 @@ if (BUILD_SHARED_LIBS) | |||||
else() | else() | ||||
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | ||||
endif() | endif() | ||||
if(CMAKE_THREAD_LIBS_INIT) | if(CMAKE_THREAD_LIBS_INIT) | ||||
target_link_libraries(megdnn PRIVATE Threads::Threads) | target_link_libraries(megdnn PRIVATE Threads::Threads) | ||||
endif() | endif() | ||||
@@ -38,6 +38,7 @@ class DeviceContext { | |||||
virtual size_t mem_alignment_in_bytes() const noexcept = 0; | virtual size_t mem_alignment_in_bytes() const noexcept = 0; | ||||
virtual void activate() = 0; | virtual void activate() = 0; | ||||
virtual void deactivate() {} | |||||
virtual void *malloc(size_t size_in_bytes) = 0; | virtual void *malloc(size_t size_in_bytes) = 0; | ||||
virtual void free(void *ptr) = 0; | virtual void free(void *ptr) = 0; | ||||
@@ -74,6 +74,13 @@ megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle) | |||||
return megcoreSuccess; | return megcoreSuccess; | ||||
} | } | ||||
megcoreStatus_t megcoreDeactivate(megcoreDeviceHandle_t handle) | |||||
{ | |||||
megdnn_assert(handle); | |||||
handle->content->deactivate(); | |||||
return megcoreSuccess; | |||||
} | |||||
megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle, | megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle, | ||||
void **devPtr, size_t sizeInBytes) | void **devPtr, size_t sizeInBytes) | ||||
{ | { | ||||
@@ -27,7 +27,6 @@ endif() | |||||
add_executable(megdnn_test ${SOURCES}) | add_executable(megdnn_test ${SOURCES}) | ||||
target_link_libraries(megdnn_test gtest) | target_link_libraries(megdnn_test gtest) | ||||
target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) | target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) | ||||
@@ -246,6 +246,7 @@ SymbolVarArray _Opr::tensor_rt_runtime(const SymbolVarArray& inputs, | |||||
} | } | ||||
#endif | #endif | ||||
SymbolVar _Opr::timestamp(SymbolVar input, PyObject* dest, size_t dest_off, | SymbolVar _Opr::timestamp(SymbolVar input, PyObject* dest, size_t dest_off, | ||||
const OperatorNodeConfig& config) { | const OperatorNodeConfig& config) { | ||||
auto tensor = std::make_shared<HostTensorND>( | auto tensor = std::make_shared<HostTensorND>( | ||||
@@ -118,6 +118,8 @@ static SymbolVarArray tensor_rt_runtime(const SymbolVarArray& inputs, | |||||
PyObject* data_bytes, | PyObject* data_bytes, | ||||
const OperatorNodeConfig& config); | const OperatorNodeConfig& config); | ||||
static SymbolVar timestamp(SymbolVar input, PyObject* dest, size_t dest_off, | static SymbolVar timestamp(SymbolVar input, PyObject* dest, size_t dest_off, | ||||
const OperatorNodeConfig& config); | const OperatorNodeConfig& config); | ||||
@@ -18,7 +18,6 @@ | |||||
#if MGB_ENABLE_OPR_MM | #if MGB_ENABLE_OPR_MM | ||||
#include "megbrain/opr/collective_comm.h" | #include "megbrain/opr/collective_comm.h" | ||||
#endif | #endif | ||||
using AxisIndexer = mgb::opr::indexing::AxisIndexer; | using AxisIndexer = mgb::opr::indexing::AxisIndexer; | ||||
/*! | /*! | ||||
@@ -88,7 +88,7 @@ if (MGB_WITH_FLATBUFFERS) | |||||
${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs | ${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs | ||||
COMMAND | COMMAND | ||||
${PYTHON_EXECUTABLE} ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} ${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs | ${PYTHON_EXECUTABLE} ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} ${CMAKE_CURRENT_BINARY_DIR}/serialization/impl/opr_param_defs.fbs | ||||
DEPENDS ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} | |||||
DEPENDS ${GEN_FLATBUFFERS_SCHEMA_PY} ${OPR_PARAM_DEFS_PY} | |||||
VERBATIM | VERBATIM | ||||
) | ) | ||||
add_custom_command( | add_custom_command( | ||||
@@ -124,7 +124,6 @@ if (MGB_WITH_FLATBUFFERS) | |||||
target_include_directories(megbrain PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/serialization/include) | target_include_directories(megbrain PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/serialization/include) | ||||
target_compile_definitions(megbrain PUBLIC MGB_ENABLE_FBS_SERIALIZATION=1) | target_compile_definitions(megbrain PUBLIC MGB_ENABLE_FBS_SERIALIZATION=1) | ||||
target_link_libraries(megbrain PUBLIC flatbuffers) | target_link_libraries(megbrain PUBLIC flatbuffers) | ||||
set (GENERATED_FLATBUFFERS_CONVERTER_PATH ${CMAKE_CURRENT_BINARY_DIR}/genfiles) | set (GENERATED_FLATBUFFERS_CONVERTER_PATH ${CMAKE_CURRENT_BINARY_DIR}/genfiles) | ||||
set (GEN_FLATBUFFERS_CONVERTER_PY ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_flatbuffers_converter.py) | set (GEN_FLATBUFFERS_CONVERTER_PY ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_flatbuffers_converter.py) | ||||
file (MAKE_DIRECTORY ${GENERATED_FLATBUFFERS_CONVERTER_PATH}) | file (MAKE_DIRECTORY ${GENERATED_FLATBUFFERS_CONVERTER_PATH}) | ||||
@@ -96,7 +96,7 @@ megcore::AsyncErrorInfo* MegDNNHandle::make_async_error_info( | |||||
cn.free_device(ptr); | cn.free_device(ptr); | ||||
} | } | ||||
}; | }; | ||||
megcore::AsyncErrorInfo zero_info{0, nullptr, "", {0,0,0,0}}; | |||||
megcore::AsyncErrorInfo zero_info{0, nullptr, "", {0, 0, 0, 0}}; | |||||
auto ptr = static_cast<megcore::AsyncErrorInfo*>( | auto ptr = static_cast<megcore::AsyncErrorInfo*>( | ||||
env.comp_node().alloc_device(sizeof(zero_info))); | env.comp_node().alloc_device(sizeof(zero_info))); | ||||
cn.copy_to_device(ptr, &zero_info, sizeof(zero_info)); | cn.copy_to_device(ptr, &zero_info, sizeof(zero_info)); | ||||
@@ -106,7 +106,7 @@ megcore::AsyncErrorInfo* MegDNNHandle::make_async_error_info( | |||||
} | } | ||||
#endif | #endif | ||||
/* =================== misc =================== */ | |||||
/* =================== misc =================== */ | |||||
#if MGB_CUDA | #if MGB_CUDA | ||||
@@ -123,9 +123,9 @@ StaticDeviceMemoryManager::make_default_impl() { | |||||
} | } | ||||
#endif // MGB_THREAD_SAFE | #endif // MGB_THREAD_SAFE | ||||
/* ==================== CUDAAsyncVarReleaser ==================== */ | |||||
#if MGB_CUDA | |||||
class VarNodeMemManager::CUDAAsyncVarReleaser { | |||||
/* ==================== AsyncVarReleaser ==================== */ | |||||
#if MGB_CUDA | |||||
class VarNodeMemManager::AsyncVarReleaser { | |||||
struct WaiterParam { | struct WaiterParam { | ||||
CompNode cn; | CompNode cn; | ||||
CompNode::Event *event; | CompNode::Event *event; | ||||
@@ -133,10 +133,10 @@ class VarNodeMemManager::CUDAAsyncVarReleaser { | |||||
}; | }; | ||||
class Waiter final: public AsyncQueueSC<WaiterParam, Waiter> { | class Waiter final: public AsyncQueueSC<WaiterParam, Waiter> { | ||||
CUDAAsyncVarReleaser *m_par_releaser; | |||||
AsyncVarReleaser *m_par_releaser; | |||||
public: | public: | ||||
Waiter(CUDAAsyncVarReleaser *releaser): | |||||
Waiter(AsyncVarReleaser *releaser): | |||||
m_par_releaser(releaser) | m_par_releaser(releaser) | ||||
{ | { | ||||
} | } | ||||
@@ -159,7 +159,7 @@ class VarNodeMemManager::CUDAAsyncVarReleaser { | |||||
Spinlock m_event_pool_lock; | Spinlock m_event_pool_lock; | ||||
public: | public: | ||||
~CUDAAsyncVarReleaser() { | |||||
~AsyncVarReleaser() { | |||||
wait_release_finish(); | wait_release_finish(); | ||||
} | } | ||||
@@ -247,15 +247,16 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() { | |||||
VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): | VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): | ||||
m_owner_graph(graph), | m_owner_graph(graph), | ||||
m_seq_mem_opt(graph) | m_seq_mem_opt(graph) | ||||
#if MGB_CUDA | |||||
,m_cuda_asyn_var_releaser(new CUDAAsyncVarReleaser) | |||||
#if MGB_CUDA | |||||
,m_asyn_var_releaser(new AsyncVarReleaser) | |||||
#endif | #endif | ||||
{ | { | ||||
auto on_comp_seq_finish = [this](const event::CompSeqExecFinished& ev) { | auto on_comp_seq_finish = [this](const event::CompSeqExecFinished& ev) { | ||||
MGB_MARK_USED_VAR(ev); | |||||
// async release is only used for sync between multiple comp nodes, and | // async release is only used for sync between multiple comp nodes, and | ||||
// does not wait for device to finish | // does not wait for device to finish | ||||
#if MGB_CUDA | |||||
m_cuda_asyn_var_releaser->wait_release_finish(); | |||||
#if MGB_CUDA | |||||
m_asyn_var_releaser->wait_release_finish(); | |||||
#endif | #endif | ||||
m_cpu_async_release_barrier.wait_zero(); | m_cpu_async_release_barrier.wait_zero(); | ||||
}; | }; | ||||
@@ -295,9 +296,10 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): | |||||
graph->event().register_receiver_permanent<event::CompSeqExecError>( | graph->event().register_receiver_permanent<event::CompSeqExecError>( | ||||
on_comp_seq_error); | on_comp_seq_error); | ||||
#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER | |||||
#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && (MGB_CUDA \ | |||||
) | |||||
auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { | auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { | ||||
m_cuda_asyn_var_releaser->wait_release_finish(); | |||||
m_asyn_var_releaser->wait_release_finish(); | |||||
}; | }; | ||||
graph->event().register_receiver_permanent<event::BeforeMemDefrag>( | graph->event().register_receiver_permanent<event::BeforeMemDefrag>( | ||||
on_mem_defrag_start); | on_mem_defrag_start); | ||||
@@ -1341,7 +1343,7 @@ void VarNodeMemManager::decr_var_mem_refcnt( | |||||
} | } | ||||
#if MGB_CUDA | #if MGB_CUDA | ||||
case DT::CUDA: | case DT::CUDA: | ||||
m_cuda_asyn_var_releaser->add(dispatch_cn, var); | |||||
m_asyn_var_releaser->add(dispatch_cn, var); | |||||
break; | break; | ||||
#endif | #endif | ||||
default: | default: | ||||
@@ -431,10 +431,10 @@ class VarNodeMemManager { | |||||
SyncableCounter m_cpu_async_release_barrier; | SyncableCounter m_cpu_async_release_barrier; | ||||
#if MGB_CUDA | |||||
//! release dynamic var on after cuda event finishes | |||||
class CUDAAsyncVarReleaser; | |||||
std::unique_ptr<CUDAAsyncVarReleaser> m_cuda_asyn_var_releaser; | |||||
#if MGB_CUDA | |||||
//! release dynamic var on after compnode event finishes | |||||
class AsyncVarReleaser; | |||||
std::unique_ptr<AsyncVarReleaser> m_asyn_var_releaser; | |||||
#endif | #endif | ||||
VarDevMemDefragmenter m_var_dev_mem_defragmenter{this}; | VarDevMemDefragmenter m_var_dev_mem_defragmenter{this}; | ||||
@@ -41,9 +41,9 @@ | |||||
} \ | } \ | ||||
} while (0) | } while (0) | ||||
#endif // MGB_ENABLE_LOGGING | |||||
#endif //MGB_ENABLE_LOGGING | |||||
#endif //MGB_CUDA | |||||
#endif | |||||
//! whether to enable asynchronous initialization for CompNode and CompNodeEnv | //! whether to enable asynchronous initialization for CompNode and CompNodeEnv | ||||
#define MGB_ENABLE_COMP_NODE_ASYNC_INIT (MGB_CUDA) | #define MGB_ENABLE_COMP_NODE_ASYNC_INIT (MGB_CUDA) | ||||
@@ -136,7 +136,6 @@ public: | |||||
* error message | * error message | ||||
*/ | */ | ||||
static std::string get_cuda_extra_info(); | static std::string get_cuda_extra_info(); | ||||
CudaError(const std::string& msg); | CudaError(const std::string& msg); | ||||
}; | }; | ||||
@@ -59,9 +59,6 @@ TEST(TestCompNode, Parse) { | |||||
ASSERT_THROW(L::parse("cpu0:"), MegBrainError); | ASSERT_THROW(L::parse("cpu0:"), MegBrainError); | ||||
ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); | ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); | ||||
ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); | ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); | ||||
ASSERT_THROW(L::parse("heaxgon0"), MegBrainError); | |||||
ASSERT_THROW(L::parse("rcom0"), MegBrainError); | |||||
ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); | |||||
ASSERT_THROW(L::parse("multithread"), MegBrainError); | ASSERT_THROW(L::parse("multithread"), MegBrainError); | ||||
ASSERT_THROW(L::parse("multithread1:"), MegBrainError); | ASSERT_THROW(L::parse("multithread1:"), MegBrainError); | ||||
ASSERT_THROW(L::parse("multithread1:default"), MegBrainError); | ASSERT_THROW(L::parse("multithread1:default"), MegBrainError); | ||||
@@ -53,6 +53,7 @@ | |||||
#cmakedefine01 MEGDNN_THREADS_512 | #cmakedefine01 MEGDNN_THREADS_512 | ||||
#cmakedefine01 MEGDNN_ENABLE_MULTI_THREADS | #cmakedefine01 MEGDNN_ENABLE_MULTI_THREADS | ||||
// whether cuda is available | // whether cuda is available | ||||
#ifndef MGB_CUDA | #ifndef MGB_CUDA | ||||
#define MGB_CUDA 1 | #define MGB_CUDA 1 | ||||
@@ -15,6 +15,7 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT) | |||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
endif() | endif() | ||||
add_executable(megbrain_test ${SOURCES}) | add_executable(megbrain_test ${SOURCES}) | ||||
target_link_libraries(megbrain_test gtest) | target_link_libraries(megbrain_test gtest) | ||||
target_link_libraries(megbrain_test megengine) | target_link_libraries(megbrain_test megengine) | ||||
@@ -98,6 +98,22 @@ dtype, RandomDistribution::UNIFORM>::operator ()( | |||||
return ret; | return ret; | ||||
} | } | ||||
template<typename dtype> | |||||
std::shared_ptr<HostTensorND> HostTensorGenerator< | |||||
dtype, RandomDistribution::CONSTANT>::operator ()( | |||||
const TensorShape &shape, CompNode cn) { | |||||
if (!cn.valid()) | |||||
cn = CompNode::load("xpu0"); | |||||
std::shared_ptr<HostTensorND> ret = | |||||
std::make_shared<HostTensorND>(cn, shape, dtype()); | |||||
auto ptr = ret->ptr<ctype>(); | |||||
for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i) { | |||||
ptr[i] = m_default_val; | |||||
} | |||||
return ret; | |||||
} | |||||
// explicit instantialization of HostTensorGenerator | // explicit instantialization of HostTensorGenerator | ||||
namespace mgb { | namespace mgb { | ||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
@@ -105,15 +121,25 @@ namespace mgb { | |||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
dtype::Float32, RandomDistribution::UNIFORM>; | dtype::Float32, RandomDistribution::UNIFORM>; | ||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
dtype::Float32, RandomDistribution::CONSTANT>; | |||||
template class HostTensorGenerator< | |||||
dtype::Float16, RandomDistribution::GAUSSIAN>; | dtype::Float16, RandomDistribution::GAUSSIAN>; | ||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
dtype::Int8, RandomDistribution::UNIFORM>; | dtype::Int8, RandomDistribution::UNIFORM>; | ||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
dtype::Int8, RandomDistribution::CONSTANT>; | |||||
template class HostTensorGenerator< | |||||
dtype::Uint8, RandomDistribution::UNIFORM>; | dtype::Uint8, RandomDistribution::UNIFORM>; | ||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
dtype::Uint8, RandomDistribution::CONSTANT>; | |||||
template class HostTensorGenerator< | |||||
dtype::Int16, RandomDistribution::UNIFORM>; | dtype::Int16, RandomDistribution::UNIFORM>; | ||||
template class HostTensorGenerator< | template class HostTensorGenerator< | ||||
dtype::Int16, RandomDistribution::CONSTANT>; | |||||
template class HostTensorGenerator< | |||||
dtype::Int32, RandomDistribution::UNIFORM>; | dtype::Int32, RandomDistribution::UNIFORM>; | ||||
template class HostTensorGenerator< | |||||
dtype::Int32, RandomDistribution::CONSTANT>; | |||||
std::shared_ptr<HostTensorND> | std::shared_ptr<HostTensorND> | ||||
HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM>:: | HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM>:: | ||||
operator()(const TensorShape& shape, CompNode cn) { | operator()(const TensorShape& shape, CompNode cn) { | ||||
@@ -175,7 +175,7 @@ class RNGxorshf { | |||||
}; | }; | ||||
enum class RandomDistribution { | enum class RandomDistribution { | ||||
GAUSSIAN, UNIFORM | |||||
GAUSSIAN, UNIFORM, CONSTANT | |||||
}; | }; | ||||
template<class dtype> | template<class dtype> | ||||
@@ -322,6 +322,26 @@ class HostTensorGenerator<dtype, RandomDistribution::UNIFORM> final: | |||||
ctype m_lo, m_hi; | ctype m_lo, m_hi; | ||||
}; | }; | ||||
//! const value | |||||
template<class dtype> | |||||
class HostTensorGenerator<dtype, RandomDistribution::CONSTANT> final: | |||||
public HostTensorGeneratorBase { | |||||
public: | |||||
using ctype = typename DTypeTrait<dtype>::ctype; | |||||
HostTensorGenerator(ctype default_val) | |||||
: HostTensorGeneratorBase{next_rand_seed()}, | |||||
m_default_val{default_val} {} | |||||
std::shared_ptr<HostTensorND> operator ()( | |||||
const TensorShape &shape, CompNode cn = {}) override; | |||||
using HostTensorGeneratorBase::operator(); | |||||
private: | |||||
ctype m_default_val; | |||||
}; | |||||
template <> | template <> | ||||
class HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM> final | class HostTensorGenerator<dtype::QuantizedS8, RandomDistribution::UNIFORM> final | ||||
: public HostTensorGeneratorBase { | : public HostTensorGeneratorBase { | ||||
@@ -21,8 +21,8 @@ pdef('PersistentOutputStorage').add_fields( | |||||
(pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator'). | (pdef('ExecutionPolicy', 'specify how to select an algorithm for an operator'). | ||||
add_enum('Strategy', | add_enum('Strategy', | ||||
Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), | Doc('HEURISTIC', 'use heuristic to choose the fastest algorithm'), | ||||
Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, ' | |||||
'and the chosen algorithm is reproducible'), | |||||
Doc('HEURISTIC_REPRODUCIBLE', 'use heuristic to choose the fastest algorithm, ' | |||||
'and the chosen algorithm is reproducible'), | |||||
Doc('PROFILE', | Doc('PROFILE', | ||||
'run possible algorithms on real device to find the best'), | 'run possible algorithms on real device to find the best'), | ||||
Doc('PROFILE_REPRODUCIBLE', | Doc('PROFILE_REPRODUCIBLE', | ||||