You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

user_allocator.cpp 3.0 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /**
  2. * \file example/cpp_example/user_allocator.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "example.h"
  12. #if LITE_BUILD_WITH_MGE
  13. using namespace lite;
  14. using namespace example;
  15. namespace {
  16. class CheckAllocator : public lite::Allocator {
  17. public:
  18. //! allocate memory of size in the given device with the given align
  19. void* allocate(LiteDeviceType, int, size_t size, size_t align) override {
  20. #ifdef WIN32
  21. return _aligned_malloc(size, align);
  22. #elif defined(__ANDROID__) || defined(ANDROID)
  23. return memalign(align, size);
  24. #else
  25. void* ptr = nullptr;
  26. auto err = posix_memalign(&ptr, align, size);
  27. if (!err) {
  28. printf("failed to malloc %zu bytes with align %zu", size, align);
  29. }
  30. return ptr;
  31. #endif
  32. };
  33. //! free the memory pointed by ptr in the given device
  34. void free(LiteDeviceType, int, void* ptr) override {
  35. #ifdef WIN32
  36. _aligned_free(ptr);
  37. #else
  38. ::free(ptr);
  39. #endif
  40. };
  41. };
  42. bool config_user_allocator(const Args& args) {
  43. std::string network_path = args.model_path;
  44. std::string input_path = args.input_path;
  45. auto allocator = std::make_shared<CheckAllocator>();
  46. //! create and load the network
  47. std::shared_ptr<Network> network = std::make_shared<Network>();
  48. Runtime::set_memory_allocator(network, allocator);
  49. network->load_model(network_path);
  50. //! set input data to input tensor
  51. std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
  52. //! copy or forward data to network
  53. size_t length = input_tensor->get_tensor_total_size_in_byte();
  54. void* dst_ptr = input_tensor->get_memory_ptr();
  55. auto src_tensor = parse_npy(input_path);
  56. void* src = src_tensor->get_memory_ptr();
  57. memcpy(dst_ptr, src, length);
  58. //! forward
  59. network->forward();
  60. network->wait();
  61. //! get the output data or read tensor set in network_in
  62. std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
  63. void* out_data = output_tensor->get_memory_ptr();
  64. size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
  65. output_tensor->get_layout().get_elem_size();
  66. printf("length=%zu\n", length);
  67. float max = -1.0f;
  68. float sum = 0.0f;
  69. for (size_t i = 0; i < out_length; i++) {
  70. float data = static_cast<float*>(out_data)[i];
  71. sum += data;
  72. if (max < data)
  73. max = data;
  74. }
  75. printf("max=%e, sum=%e\n", max, sum);
  76. return true;
  77. }
  78. } // namespace
  79. REGIST_EXAMPLE("config_user_allocator", config_user_allocator);
  80. #endif
  81. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}