You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

extra_impl_helper.h 2.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. /**
  2. * \file test/common/extra_impl_helper.h
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #pragma once
  12. #include "megdnn/basic_types.h"
  13. #include "megdnn/handle.h"
  14. #include "megdnn/oprs/general.h"
  15. #include "test/common/opr_proxy.h"
  16. namespace megdnn {
  17. namespace test {
  18. template <typename Opr, int NR_OUTPUTS = 1, typename Proxy = OprProxy<Opr>>
  19. std::function<void(const TensorNDArray&)> extra_impl_helper(
  20. Handle* h, const typename Opr::Param& p) {
  21. auto impl = [](const TensorNDArray& tensors, Handle* h,
  22. const typename Opr::Param& p) {
  23. static_assert(
  24. NR_OUTPUTS <= OprTrait<Opr>::arity,
  25. "OutNumber should less than or equal to arity.");
  26. Proxy proxy;
  27. auto fp32_opr = h->create_operator<Opr>();
  28. auto type_cvt = h->create_operator<TypeCvt>();
  29. fp32_opr->param() = p;
  30. TensorNDArray fp32_tensors;
  31. for (size_t i = 0; i < tensors.size(); ++i) {
  32. auto layout = tensors[i].layout;
  33. layout.dtype = dtype::Float32();
  34. fp32_tensors.emplace_back(malloc(layout.span().dist_byte()), layout);
  35. type_cvt->exec(tensors[i], fp32_tensors[i]);
  36. }
  37. proxy.exec(fp32_opr.get(), fp32_tensors);
  38. for (size_t i = fp32_tensors.size() - NR_OUTPUTS; i < fp32_tensors.size();
  39. ++i) {
  40. type_cvt->exec(fp32_tensors[i], tensors[i]);
  41. }
  42. for (size_t i = 0; i < tensors.size(); ++i) {
  43. free(fp32_tensors[i].raw_ptr());
  44. }
  45. };
  46. return std::bind(impl, std::placeholders::_1, h, std::cref(p));
  47. }
  48. template <>
  49. std::function<void(const TensorNDArray&)> extra_impl_helper<AddUpdate>(
  50. Handle* h, const AddUpdate::Param& p);
  51. } // namespace test
  52. } // namespace megdnn
  53. // vim: syntax=cpp.doxygen

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台