You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

subgraph_detail.cpp 7.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /**
  2. * \file imperative/src/impl/subgraph_detail.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/imperative/subgraph_detail.h"
  12. #include "megbrain/imperative/graph_builder.h"
  13. #include "megbrain/opr/io.h"
  14. #include "megbrain/imperative/ops/autogen.h"
  15. #include "./op_trait.h"
  16. namespace mgb {
  17. namespace imperative {
  18. namespace subgraph_detail {
  19. VarNodeArray apply_on_var_node(
  20. const OpDef& def,
  21. const VarNodeArray& inputs) {
  22. SmallVector<LogicalTensorDesc> input_descs;
  23. for (auto&& input: inputs) {
  24. input_descs.push_back({TensorLayout{input->dtype()}, input->comp_node()});
  25. }
  26. auto apply_functor = [](const std::shared_ptr<OpDef>& op, const VarNodeArray& inputs, size_t nr_outputs){
  27. return OpDef::apply_on_var_node(*op, inputs);
  28. };
  29. auto const_functor = [&](const TensorPtr& value) {
  30. return opr::ImmutableTensor::make(*inputs[0]->owner_graph(), value->get_value()).node();
  31. };
  32. auto subgraph = def.trait()->make_forward_graph(def, input_descs);
  33. auto outputs = subgraph.apply(inputs, apply_functor, const_functor);
  34. return outputs;
  35. }
  36. std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
  37. const OpDef& def,
  38. const SmallVector<LogicalTensorDesc>& inputs) {
  39. auto subgraph = def.trait()->make_forward_graph(def, inputs);
  40. bool all_validated = true;
  41. auto apply_functor = [&](const std::shared_ptr<OpDef>& op, const SmallVector<LogicalTensorDesc>& inputs, size_t nr_outputs){
  42. auto [outputs, validated] = OpDef::infer_output_attrs_fallible(*op, inputs);
  43. all_validated = all_validated && validated;
  44. return outputs;
  45. };
  46. auto const_functor = [&](const TensorPtr& value) {
  47. return LogicalTensorDesc{value->layout(), value->comp_node(), value->get_value().proxy_to_default_cpu()};
  48. };
  49. auto outputs = subgraph.apply(inputs, apply_functor, const_functor);
  50. return { outputs, all_validated };
  51. }
  52. SmallVector<TensorPtr> apply_on_physical_tensor(
  53. const OpDef& def,
  54. SmallVector<TensorPtr> inputs) {
  55. SmallVector<LogicalTensorDesc> input_descs;
  56. for (auto&& input: inputs) {
  57. input_descs.push_back({input->layout(), input->comp_node()});
  58. }
  59. auto subgraph = def.trait()->make_forward_graph(def, input_descs);
  60. auto apply_functor = [](const std::shared_ptr<OpDef>& op, const SmallVector<TensorPtr>& inputs, size_t nr_outputs){
  61. return OpDef::apply_on_physical_tensor(*op, inputs);
  62. };
  63. auto const_functor = [&](const TensorPtr& value) {
  64. return value;
  65. };
  66. auto outputs = subgraph.apply(inputs, apply_functor, const_functor);
  67. return outputs;
  68. }
  69. static EncodedSubraph make_backward_graph_from_forward(
  70. const SmallVector<LogicalTensorDesc>& inputs,
  71. const SmallVector<bool>& input_requires_grad,
  72. const SmallVector<bool>& output_has_grad,
  73. EncodedSubraph forward_graph) {
  74. using namespace std::placeholders;
  75. using var_t = Subgraph::var_t;
  76. using vars_t = Subgraph::vars_t;
  77. Subgraph::Builder<LogicalTensorDesc> builder([](auto&& op, auto&& input_descs, size_t nr_outputs){
  78. auto [descs, _] = OpDef::infer_output_attrs_fallible(*op, input_descs);
  79. return descs;
  80. });
  81. auto accum_grad = [&](var_t lhs, var_t rhs) {
  82. return builder.write_expr(Elemwise::make(Elemwise::Mode::ADD), {lhs, rhs}, 1)[0];
  83. };
  84. GradContext<var_t> grad_context{accum_grad};
  85. auto input_vars = builder.write_inputs(inputs);
  86. auto outputs = forward_graph.apply(input_vars, std::bind(&decltype(builder)::write_expr, &builder, _1, _2, _3), [&](TensorPtr constant){
  87. return builder.write_constant(constant, {constant->layout(), constant->comp_node()});
  88. });
  89. size_t nr_outputs = outputs.size();
  90. auto apply_mask = [](auto&& values, SmallVector<bool> mask) {
  91. mgb_assert(mask.size() == values.size(), "");
  92. std::decay_t<decltype(values)> results;
  93. for (size_t i = 0; i < mask.size(); ++i) {
  94. if (mask[i]) {
  95. results.push_back(values[i]);
  96. }
  97. }
  98. return results;
  99. };
  100. grad_context.mark_require_grads(apply_mask(input_vars, input_requires_grad));
  101. builder.iterate([&](std::list<Subgraph::expr_t>::iterator iter){
  102. grad_context.record_expr(iter->op, iter->inputs, iter->outputs);
  103. });
  104. auto output_descs = builder.get_descs(outputs);
  105. auto computed_outputs = builder.write_inputs(output_descs);
  106. auto output_grads = builder.write_inputs(output_descs);
  107. grad_context.backward(
  108. apply_mask(outputs, output_has_grad),
  109. apply_mask(output_grads, output_has_grad),
  110. [&](Subgraph::expr_t expr, vars_t output_grads) {
  111. auto bg = OpDef::make_backward_graph(
  112. *expr.op, builder.get_descs(expr.inputs),
  113. grad_context.get_require_grads(expr.inputs),
  114. grad_context.get_has_grads(expr.outputs));
  115. if (bg.graph.empty()) {
  116. return vars_t(expr.inputs.size(), 0);
  117. }
  118. vars_t grad_inputs;
  119. grad_inputs.insert(grad_inputs.end(), expr.inputs.begin(),
  120. expr.inputs.end());
  121. grad_inputs.insert(grad_inputs.end(), expr.outputs.begin(),
  122. expr.outputs.end());
  123. grad_inputs.insert(grad_inputs.end(), output_grads.begin(),
  124. output_grads.end());
  125. auto apply_functor = std::bind(&decltype(builder)::write_expr,
  126. &builder, _1, _2, _3);
  127. auto const_functor = [&](TensorPtr constant) {
  128. return builder.write_constant(constant, {constant->layout(),
  129. constant->comp_node()});
  130. };
  131. return bg.apply(grad_inputs, apply_functor, const_functor);
  132. });
  133. builder.add_outputs(grad_context.get_grads(input_vars));
  134. for (size_t i = 0; i < nr_outputs; ++i) {
  135. builder.replace_var(outputs[i], computed_outputs[i]);
  136. }
  137. auto backward_graph = builder.encode();
  138. return backward_graph;
  139. }
  140. EncodedSubraph make_backward_graph(
  141. const OpDef& def,
  142. const SmallVector<LogicalTensorDesc>& inputs,
  143. const SmallVector<bool>& input_requires_grad,
  144. const SmallVector<bool>& output_has_grad) {
  145. auto forward_graph = OpDef::make_forward_graph(def, inputs);
  146. return make_backward_graph_from_forward(inputs, input_requires_grad, output_has_grad, forward_graph);
  147. }
  148. std::tuple<SmallVector<MemoryDesc>, SmallVector<MemoryDesc>> infer_output_mem_desc(
  149. const OpDef& def,
  150. const SmallVector<TensorPtr>& inputs_tensors,
  151. const SmallVector<MemoryDesc>& inputs_mems) {
  152. return {{}, {}};
  153. }
  154. }
  155. }
  156. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台