You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cond_take.cpp 2.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. /**
  2. * \file imperative/src/impl/ops/cond_take.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/imperative/ops/autogen.h"
  12. #include "megbrain/opr/misc.h"
  13. #include "../dnn_op_helper.h"
  14. #include "../op_trait.h"
  15. using namespace megdnn;
  16. namespace mgb::imperative {
  17. namespace {
  18. cg::OperatorNodeBase* apply_on_var_node(
  19. const OpDef& def,
  20. const VarNodeArray& inputs) {
  21. def.cast_final_safe<CondTake>();
  22. auto&& graph = inputs[0]->owner_graph();
  23. opr::CondTake::Param param;
  24. param.val = 1;
  25. cg::OperatorNodeConfig config;
  26. cg::OperatorNodeBase* opr = graph->insert_opr(
  27. std::make_unique<opr::CondTake>(
  28. inputs[0], inputs[1], param, config));
  29. return opr;
  30. }
  31. SmallVector<TensorPtr> apply_on_physical_tensor(
  32. const OpDef& def,
  33. const SmallVector<TensorPtr>& inputs) {
  34. auto&& opr = def.cast_final_safe<CondTake>();
  35. mgb_assert(opr.same_type<CondTake>());
  36. mgb_assert(inputs.size() == 2, "CondTake take 2 inputs, got %lu",
  37. inputs.size());
  38. auto&& inp = inputs[0];
  39. auto&& msk = inputs[1];
  40. mgb_assert(inp->layout().eq_shape(msk->layout()),
  41. "input shape does not match mask shape");
  42. mgb_assert(msk->get_value().dtype().enumv() == DTypeEnum::Bool,
  43. "mask dtype must be bool");
  44. DnnOprCaller<megdnn::CondTake> dnn_op(inp->comp_node());
  45. dnn_op.op->param().val = 1;
  46. TensorLayout m_layout({dnn_op.op->get_workspace_in_bytes(inp->layout())},
  47. dtype::Byte());
  48. auto dnn_workspace = dnn_op.create_workspace(m_layout);
  49. MegDNNDynOutMallocImpl<2> policy{inp->comp_node()};
  50. dnn_op.op->exec(inp->dev_tensor().as_megdnn(),
  51. msk->dev_tensor().as_megdnn(),
  52. dnn_workspace,
  53. &policy);
  54. SmallVector<TensorPtr> out;
  55. out.push_back(policy.at(0));
  56. out.push_back(policy.at(1));
  57. return out;
  58. }
  59. std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
  60. const OpDef& def,
  61. const SmallVector<LogicalTensorDesc>& inputs) {
  62. auto cn = inputs[0].comp_node;
  63. return {{
  64. {TensorLayout(inputs[0].layout.dtype), cn},
  65. {TensorLayout(dtype::Int32()), cn}
  66. }, true};
  67. }
  68. OP_TRAIT_REG(CondTake, CondTake, opr::CondTake)
  69. .apply_on_var_node(apply_on_var_node)
  70. .apply_on_physical_tensor(apply_on_physical_tensor)
  71. .infer_output_attrs_fallible(infer_output_attrs_fallible)
  72. .fallback();
  73. } // namespace
  74. } // namespace mgb::imperative

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台