diff --git a/src/core/impl/graph/seq_comp_node_opt_impl.cpp b/src/core/impl/graph/seq_comp_node_opt_impl.cpp
index 9b655a85..e135ab13 100644
--- a/src/core/impl/graph/seq_comp_node_opt_impl.cpp
+++ b/src/core/impl/graph/seq_comp_node_opt_impl.cpp
@@ -109,11 +109,8 @@ void SeqCompNodeOptimizerImpl::change_to_specific_stream(
             type = any_strong_changed ?
                 StreamPropType::STRONG : StreamPropType::WEAK;
             int copy_stream = CompNode::Stream::COPY;
-            int nccl_stream = CompNode::Stream::NCCL;
             if (inp_streams.count(copy_stream))
                 stream = copy_stream;
-            else if (inp_streams.count(nccl_stream))
-                stream = nccl_stream;
             mgb_assert(type != StreamPropType::NONE && stream != 0);
         }
         return prop_type_storage.second = StreamPropType{stream, type};
@@ -188,8 +185,7 @@ void SeqCompNodeOptimizerImpl::register_stream_var(
     mgb_assert(var->owner_graph() == m_owner_graph &&
             (prop_type == StreamPropType::WEAK ||
              prop_type == StreamPropType::STRONG));
-    mgb_assert(stream == CompNode::Stream::COPY || stream ==
-            CompNode::Stream::NCCL);
+    mgb_assert(stream == CompNode::Stream::COPY);
 
     auto ins = m_var2prop_type.insert({var, {stream, prop_type}});
     if (!ins.second) {
diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h
index 9cf376a9..1c082977 100644
--- a/src/core/include/megbrain/comp_node.h
+++ b/src/core/include/megbrain/comp_node.h
@@ -207,8 +207,7 @@ class CompNode {
             static constexpr int
                 COPY = -1,
                 REMOTE_SEND = -2,
-                LOOP_SWAP = -3,
-                NCCL = -4;
+                LOOP_SWAP = -3;
         };
 
         CompNode() = default;
diff --git a/src/opr-mm/impl/collective_comm.cpp b/src/opr-mm/impl/collective_comm.cpp
index 46e43872..e3bc3f13 100644
--- a/src/opr-mm/impl/collective_comm.cpp
+++ b/src/opr-mm/impl/collective_comm.cpp
@@ -630,11 +630,7 @@ void CollectiveComm::get_output_var_shape(const TensorShapeArray& inp_shape,
                                   inp_shape, out_shape);
 }
 
-void CollectiveComm::init_output_comp_node() {
-    mgb_assert(output().size() == 1, "exactly one output expected, got %zu", output().size());
-    owner_graph()->seq_comp_node_optimizer().register_stream_var(output()[0],
-        {CompNode::Stream::NCCL, cg::SeqCompNodeOptimizer::StreamPropType::WEAK});
-}
+void CollectiveComm::init_output_comp_node() {}
 
 void CollectiveComm::init_output_mem_plan(bool dynamic) {
     for (size_t i = 0; i < output().size(); i++) {
diff --git a/src/opr/test/basic_arith/others.cpp b/src/opr/test/basic_arith/others.cpp
index a3a617af..f599da7d 100644
--- a/src/opr/test/basic_arith/others.cpp
+++ b/src/opr/test/basic_arith/others.cpp
@@ -269,13 +269,13 @@ TEST(TestOprBasicArith, AddUpdateOtherStream) {
     };
 
     std::shared_ptr<HostTensorND> host_val = gen({SIZE});
-    auto cn_nccl = CompNode::load("gpu0").change_stream(CompNode::Stream::NCCL);
+    auto cn1 = CompNode::load("gpu0:0").change_stream(1);
     auto param = opr::SharedDeviceTensor::make(*graph, *host_val);
     param.node()->owner_opr()->node_prop().attribute().priority =
             std::numeric_limits<int>::max();
-    auto copy = opr::Copy::make(param, cn_nccl);
+    auto copy = opr::Copy::make(param, cn1);
     auto add = (copy + 3) * 5;
-    auto add_update = opr::AddUpdate::make(param, add, {}, {cn_nccl});
+    auto add_update = opr::AddUpdate::make(param, add, {}, {cn1});
 
     auto callback = opr::CallbackInjector::make(add_update, set_flag);