test(lite): add ax model test

GitOrigin-RevId: 495cbdd397
3 years ago · 0d37bfb0cd
--- a/.gitattributes
+++ b/.gitattributes
@@ -19,3 +19,4 @@ ci/resource/dump/relayout_format_8.10.0.mdl filter=lfs diff=lfs merge=lfs -text
 ci/resource/dump/batch_conv_bias_with_policy_8.8.0.mdl filter=lfs diff=lfs merge=lfs -text
 ci/resource/prof/model_with_err_assert.mdl filter=lfs diff=lfs merge=lfs -text
 ci/resource/prof/test_mge.mge filter=lfs diff=lfs merge=lfs -text
 lite/test/resource/lite/ax_models/64-58063ce2.axe filter=lfs diff=lfs merge=lfs -text
--- a/lite/test/test_network.cpp
+++ b/lite/test/test_network.cpp
@@ -15,6 +15,11 @@
 #include "./test_common.h"
 #include "megbrain/tensor.h"

 #ifndef WIN32
 #include <dirent.h>
 #include <string.h>
 #endif

 #include <chrono>
 #include <memory>
 #include <random>
@@ -497,6 +502,115 @@ void test_input_no_copy(int record) {
        compare_lite_tensor<float>(output_tensor, outputs[i]);
    }
 }

 void test_io_no_copy_ax(std::string model_name, int record = 1) {
    std::string model_path = model_name;
    std::vector<std::string> input_names, output_names;

    std::vector<std::vector<std::shared_ptr<Tensor>>> inputs;
    std::vector<std::vector<std::shared_ptr<Tensor>>> outputs;

    std::shared_ptr<Network> network = std::make_shared<Network>();
    network->load_model(model_path);

    input_names = network->get_all_input_name();
    output_names = network->get_all_output_name();

    // prepare test data
    for (int i = 0; i < 3; i++) {
        std::vector<std::shared_ptr<Tensor>> net_inputs;
        std::vector<std::shared_ptr<Tensor>> net_outputs;

        for (size_t j = 0; j < input_names.size(); j++) {
            auto in_tesnor = network->get_io_tensor(input_names[j]);
            auto in_layout = in_tesnor->get_layout();
            auto tmp_in = std::make_shared<Tensor>(LiteDeviceType::LITE_CPU, in_layout);

            auto size = in_tesnor->get_tensor_total_size_in_byte() /
                        in_layout.get_elem_size();
            if (in_layout.data_type == LiteDataType::LITE_INT16) {
                auto ptr = static_cast<short*>(tmp_in->get_memory_ptr());
                for (size_t id = 0; id < size; id++) {
                    ptr[id] = i + 1;
                }
            } else if (in_layout.data_type == LiteDataType::LITE_UINT8) {
                auto ptr = static_cast<uint8_t*>(tmp_in->get_memory_ptr());
                for (size_t id = 0; id < size; id++) {
                    ptr[id] = i + 1;
                }
            }
            net_inputs.push_back(tmp_in);
            in_tesnor->copy_from(*tmp_in);
        }

        inputs.push_back(net_inputs);
        network->forward();
        network->wait();

        for (size_t j = 0; j < output_names.size(); j++) {
            auto out_tesnor = network->get_io_tensor(output_names[j]);
            auto out_layout = out_tesnor->get_layout();
            auto tmp_out =
                    std::make_shared<Tensor>(LiteDeviceType::LITE_CPU, out_layout);

            tmp_out->copy_from(*out_tesnor);
            net_outputs.push_back(tmp_out);
        }
        outputs.push_back(net_outputs);
    }

    Config config;
    config.options.force_output_use_user_specified_memory = true;
    config.options.comp_node_seq_record_level = record;
    config.options.const_shape = true;

    std::shared_ptr<Network> network_record = std::make_shared<Network>(config);

    network_record->load_model(model_path);

    for (int i = 0; i < 3; i++) {
        for (size_t j = 0; j < inputs[i].size(); j++) {
            auto input_tensor = network_record->get_io_tensor(input_names[j]);
            input_tensor->reset(
                    inputs[i][j]->get_memory_ptr(), inputs[i][j]->get_layout());
        }

        std::vector<std::shared_ptr<Tensor>> net_outputs;

        for (size_t j = 0; j < outputs[i].size(); j++) {
            auto output_tensor = network_record->get_io_tensor(output_names[j]);
            auto tmp_out = std::make_shared<Tensor>(
                    LiteDeviceType::LITE_CPU, output_tensor->get_layout());
            output_tensor->reset(
                    tmp_out->get_memory_ptr(), output_tensor->get_layout());
            net_outputs.push_back(tmp_out);
        }

        network_record->forward();
        network_record->wait();

        for (size_t j = 0; j < outputs[i].size(); j++) {
            auto output_tensor = network_record->get_io_tensor(output_names[j]);
            compare_lite_tensor<float>(output_tensor, outputs[i][j]);
        }
    }
    printf("profile the model %s run\n", model_path.c_str());
    std::vector<std::shared_ptr<Tensor>> net_outputs;
    for (size_t j = 0; j < outputs[0].size(); j++) {
        auto output_tensor = network_record->get_io_tensor(output_names[j]);
        auto tmp_out = std::make_shared<Tensor>(
                LiteDeviceType::LITE_CPU, output_tensor->get_layout());
        output_tensor->reset(tmp_out->get_memory_ptr(), output_tensor->get_layout());
        net_outputs.push_back(tmp_out);
    }
    lite::Timer timer("profile");
    for (int i = 0; i < 10; i++) {
        network_record->forward();
        network_record->wait();
    }
    auto sum_time = timer.get_used_time();
    printf("model %s used time average %f ms\n", model_path.c_str(), sum_time / 10);
 }
 }  // namespace

 TEST(TestNetWork, OutputNoCopy) {
@@ -515,6 +629,28 @@ TEST(TestNetWork, IONoCopyRecord) {
    test_input_no_copy(1);
 }

 TEST(TestNetWork, IONoCopyRecordAx) {
    std::vector<std::string> file_names;
 #ifndef WIN32
    DIR* dirptr = NULL;
    struct dirent* dirp;
    std::string model_dir = "./ax_models";
    dirptr = opendir(model_dir.c_str());
    while (dirptr != NULL && (dirp = readdir(dirptr)) != NULL) {
        std::string file_name(dirp->d_name);
        if (file_name.find(".axe", 0) != std::string::npos) {
            file_names.push_back(model_dir + "/" + file_name);
        }
    }
    closedir(dirptr);
 #endif

    for (auto file_name : file_names) {
        printf("test model: %s\n", file_name.c_str());
        test_io_no_copy_ax(file_name);
    }
 }

 TEST(TestNetWork, OutputDynamicAlloc) {
    Config config;
    config.options.force_output_dynamic_alloc = true;