@@ -40,6 +40,10 @@ TEST(TestCompNode, Parse) { | |||||
ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ||||
ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ||||
ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); | |||||
ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); | |||||
ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); | |||||
ASSERT_EQ(L::parse("cambriconx"), make_lc(D::CAMBRICON, -1, 0)); | ASSERT_EQ(L::parse("cambriconx"), make_lc(D::CAMBRICON, -1, 0)); | ||||
ASSERT_EQ(L::parse("cambricon2"), make_lc(D::CAMBRICON, 2, 0)); | ASSERT_EQ(L::parse("cambricon2"), make_lc(D::CAMBRICON, 2, 0)); | ||||
ASSERT_EQ(L::parse("cambricon2:3"), make_lc(D::CAMBRICON, 2, 3)); | ASSERT_EQ(L::parse("cambricon2:3"), make_lc(D::CAMBRICON, 2, 3)); | ||||
@@ -66,6 +70,7 @@ TEST(TestCompNode, Parse) { | |||||
ASSERT_THROW(L::parse("cpu0:"), MegBrainError); | ASSERT_THROW(L::parse("cpu0:"), MegBrainError); | ||||
ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); | ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); | ||||
ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); | ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); | ||||
ASSERT_THROW(L::parse("rcom0"), MegBrainError); | |||||
ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); | ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); | ||||
ASSERT_THROW(L::parse("atlast0"), MegBrainError); | ASSERT_THROW(L::parse("atlast0"), MegBrainError); | ||||
ASSERT_THROW(L::parse("multithread"), MegBrainError); | ASSERT_THROW(L::parse("multithread"), MegBrainError); | ||||
@@ -296,6 +301,18 @@ TEST(TestCompNodeCuda, set_prealloc_config) { | |||||
4, CompNode::DeviceType::CUDA); | 4, CompNode::DeviceType::CUDA); | ||||
} | } | ||||
#if MGB_ROCM | |||||
TEST(TestCompNodeROCm, MemNode) { | |||||
REQUIRE_AMD_GPU(2); | |||||
auto cn00 = CompNode::load("rocm0"), | |||||
cn1 = CompNode::load("rocm1"), | |||||
cn01 = CompNode::load("rocm0:1"); | |||||
ASSERT_EQ(cn00, CompNode::load("rocm0")); | |||||
ASSERT_EQ(cn00.mem_node(), cn01.mem_node()); | |||||
ASSERT_NE(cn00.mem_node(), cn1.mem_node()); | |||||
} | |||||
#endif | |||||
#if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
TEST(TestCompNodeCambricon, MemNode) { | TEST(TestCompNodeCambricon, MemNode) { | ||||
@@ -466,6 +483,10 @@ TEST(TestCompNodeCPU, PeerCopyFromCUDA) { | |||||
test_peer_copy_from_device("gpux"); | test_peer_copy_from_device("gpux"); | ||||
} | } | ||||
TEST(TestCompNodeCPU, PeerCopyFromROCm) { | |||||
REQUIRE_AMD_GPU(1); | |||||
test_peer_copy_from_device("rocmx"); | |||||
} | |||||
#if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
TEST(TestCompNodeCPU, PeerCopyFromCambricon) { | TEST(TestCompNodeCPU, PeerCopyFromCambricon) { | ||||
@@ -14,6 +14,10 @@ | |||||
#include "../internal/invoke.h" | #include "../internal/invoke.h" | ||||
#if MGB_ROCM | |||||
#include "hcc_detail/hcc_defs_prologue.h" | |||||
#include "megcore_rocm.h" | |||||
#endif | |||||
//! TODO: here has to be know some megdnn::opr when there is produced midout.h | //! TODO: here has to be know some megdnn::opr when there is produced midout.h | ||||
//! fix it if there is another graceful way. | //! fix it if there is another graceful way. | ||||
@@ -58,6 +62,11 @@ template <typename Opr> | |||||
typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | ||||
const TParam& raw_param) { | const TParam& raw_param) { | ||||
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_impl"))) | MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_impl"))) | ||||
#if MGB_ROCM | |||||
bool miopen_algo_search_enabled; | |||||
megcore::getMIOpenAlgoSearchStatus(&miopen_algo_search_enabled); | |||||
mgb_assert(miopen_algo_search_enabled, "MIOpen algo search not enabled"); | |||||
#endif | |||||
auto&& param = raw_param.as_single_pod<Param>(); | auto&& param = raw_param.as_single_pod<Param>(); | ||||
CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | ||||
auto megdnn_opr = intl::create_megdnn_opr<Opr>(cn); | auto megdnn_opr = intl::create_megdnn_opr<Opr>(cn); | ||||
@@ -234,6 +243,9 @@ Maybe<typename TimedProfiler<Opr>::Result> TimedProfiler<Opr>::profile( | |||||
template <typename Opr> | template <typename Opr> | ||||
void TimedProfiler<Opr>::prof_init_device(const TParam& raw_param) { | void TimedProfiler<Opr>::prof_init_device(const TParam& raw_param) { | ||||
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_init_device"))) | MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_init_device"))) | ||||
#if MGB_ROCM | |||||
megcore::enableMIOpenAlgoSearch(true); | |||||
#endif | |||||
auto&& param = raw_param.as_single_pod<Param>(); | auto&& param = raw_param.as_single_pod<Param>(); | ||||
CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | ||||
// wait for cuda init, so its time does not get accounted in timeout | // wait for cuda init, so its time does not get accounted in timeout | ||||
@@ -327,6 +327,13 @@ bool mgb::check_gpu_available(size_t num) { | |||||
return true; | return true; | ||||
} | } | ||||
bool mgb::check_amd_gpu_available(size_t num) { | |||||
if (CompNode::get_device_count(CompNode::DeviceType::ROCM) < num) { | |||||
mgb_log_warn("skip test case that requires %zu AMD GPU(s)", num); | |||||
return false; | |||||
} | |||||
return true; | |||||
} | |||||
bool mgb::check_cambricon_device_available(size_t num) { | bool mgb::check_cambricon_device_available(size_t num) { | ||||
if (CompNode::get_device_count(CompNode::DeviceType::CAMBRICON) < num) { | if (CompNode::get_device_count(CompNode::DeviceType::CAMBRICON) < num) { | ||||
@@ -460,6 +460,8 @@ std::vector<CompNode> load_multiple_xpus(size_t num); | |||||
//! check whether given number of GPUs is available | //! check whether given number of GPUs is available | ||||
bool check_gpu_available(size_t num); | bool check_gpu_available(size_t num); | ||||
//! check whether given number of AMD GPUs is available | |||||
bool check_amd_gpu_available(size_t num); | |||||
//! check whether given number of cambricon devices is available | //! check whether given number of cambricon devices is available | ||||
bool check_cambricon_device_available(size_t num); | bool check_cambricon_device_available(size_t num); | ||||