/** * \file dnn/test/armv7/convolution.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "test/armv7/fixture.h" #include "test/common/convolution.h" #include "test/common/checker.h" #include "test/common/benchmarker.h" #include "test/common/rng.h" using namespace megdnn; using namespace test; #if MEGDNN_WITH_BENCHMARK TEST_F(ARMV7, BENCHMARK_CONVOLUTION_STRIDE2) { using Param = param::Convolution; auto run = [&](const TensorShapeArray& shapes, Param param) { Benchmarker benchmarker_float(handle()); size_t RUN = 100; auto tfloat = benchmarker_float.set_display(false) .set_times(RUN) .set_param(param) .exec(shapes); size_t IC = shapes[1][1]; size_t FH = shapes[1][2]; size_t FW = shapes[1][3]; TensorLayout dst_layout; auto opr = handle()->create_operator(); opr->param() = param; opr->deduce_layout({shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, dst_layout); printf("flops: %.3f mflops\n", (IC * dst_layout.total_nr_elems() * FH * FW * 2) / (tfloat / RUN * 1000)); }; auto profile = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, size_t stride) { Param param; param.stride_h = stride; param.stride_w = stride; param.pad_h = kernel / 2; param.pad_w = kernel / 2; printf("oc: %zd ic: %zd w: %zd h: %zd stride: %zd kernel_size: %zd\n", oc, ic, w, h, stride, kernel); run({{1, ic, h, w}, {oc, ic, kernel, kernel}, {}}, param); }; for (size_t kernel : {2, 3, 5, 7}) { for (size_t ic : {3, 6, 12, 24}) { for (size_t oc : {3, 6, 12, 24}) { for (size_t size : {4, 7, 8, 14, 16, 17, 28, 32, 34, 64, 112}) { profile(oc, ic, size, size, kernel, 2); } } } } } #endif TEST_F(ARMV7, BENCHMARK_CONVOLUTION_1X1) { int exec_times = 50; Benchmarker benchmarker_gemm(handle()); benchmarker_gemm.set_times(exec_times); Benchmarker benchmarker(handle()); benchmarker.set_times(exec_times); float mod = 1000 * exec_times / 1e9; auto run = [&](size_t IC, size_t OC, size_t H, size_t W) { float time = 1.f, perf = 1.f; std::cout< benchmarker_gconv1x1(handle()); benchmarker_gconv1x1.set_times(exec_times); float mod = 1000 * exec_times / 1e9; auto run = [&](size_t IC, size_t OC, size_t H, size_t W, size_t group){ float time = 1.f, perf = 1.f; std::cout<