|
|
@@ -709,6 +709,41 @@ void run_test_st(Args &env) { |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
auto run_iters = [&](uint32_t case_idx) -> float { |
|
|
|
double time_sqrsum = 0, time_sum = 0, |
|
|
|
min_time = std::numeric_limits<double>::max(), max_time = 0; |
|
|
|
for (int run = 0; run < env.nr_run; ++run) { |
|
|
|
mgb_log_debug("load_and_run: before running iter %d", run); |
|
|
|
timer.reset(); |
|
|
|
func->execute(); |
|
|
|
mgb_log_debug("load_and_run: before waiting iter %d", run); |
|
|
|
auto exec_time = timer.get_msecs(); |
|
|
|
func->wait(); |
|
|
|
output_dumper.write_to_file(); |
|
|
|
auto cur = timer.get_msecs(); |
|
|
|
printf("iter %d/%d: %.3fms (exec=%.3f,device=%.3f)\n", run, |
|
|
|
env.nr_run, cur, exec_time, |
|
|
|
func->get_prev_exec_time() * 1e3); |
|
|
|
time_sum += cur; |
|
|
|
time_sqrsum += cur * cur; |
|
|
|
fflush(stdout); |
|
|
|
if (cur < min_time) { |
|
|
|
min_time = cur; |
|
|
|
} |
|
|
|
if (cur > max_time) { |
|
|
|
max_time = cur; |
|
|
|
} |
|
|
|
} |
|
|
|
printf("=== finished test #%u: time=%.3fms avg_time=%.3fms " |
|
|
|
"sd=%.3fms minmax=%.3f,%.3f\n\n", |
|
|
|
case_idx, time_sum, time_sum / env.nr_run, |
|
|
|
std::sqrt((time_sqrsum * env.nr_run - time_sum * time_sum) / |
|
|
|
(env.nr_run * (env.nr_run - 1))), |
|
|
|
min_time, max_time); |
|
|
|
return time_sum; |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
if (nr_test) { |
|
|
|
// run testcase, generated by dump_with_testcase.py |
|
|
|
|
|
|
@@ -742,37 +777,7 @@ void run_test_st(Args &env) { |
|
|
|
if (!env.nr_run) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
double time_sqrsum = 0, time_sum = 0, |
|
|
|
min_time = std::numeric_limits<double>::max(), max_time = 0; |
|
|
|
for (int run = 0; run < env.nr_run; ++ run) { |
|
|
|
mgb_log_debug("load_and_run: before running iter %d", run); |
|
|
|
timer.reset(); |
|
|
|
func->execute(); |
|
|
|
mgb_log_debug("load_and_run: before waiting iter %d", run); |
|
|
|
auto exec_time = timer.get_msecs(); |
|
|
|
func->wait(); |
|
|
|
output_dumper.write_to_file(); |
|
|
|
auto cur = timer.get_msecs(); |
|
|
|
printf("iter %d/%d: %.3fms (exec=%.3f,device=%.3f)\n", run, |
|
|
|
env.nr_run, cur, exec_time, |
|
|
|
func->get_prev_exec_time() * 1e3); |
|
|
|
time_sum += cur; |
|
|
|
time_sqrsum += cur * cur; |
|
|
|
fflush(stdout); |
|
|
|
if (cur < min_time) { |
|
|
|
min_time = cur; |
|
|
|
} |
|
|
|
if (cur > max_time) { |
|
|
|
max_time = cur; |
|
|
|
} |
|
|
|
} |
|
|
|
tot_time += time_sum; |
|
|
|
printf("=== finished test #%u: time=%.3fms avg_time=%.3fms " |
|
|
|
"sd=%.3fms minmax=%.3f,%.3f\n\n", |
|
|
|
i, time_sum, time_sum / env.nr_run, |
|
|
|
std::sqrt((time_sqrsum * env.nr_run - time_sum * time_sum) / |
|
|
|
(env.nr_run * (env.nr_run - 1))), |
|
|
|
min_time, max_time); |
|
|
|
tot_time += run_iters(i); |
|
|
|
} |
|
|
|
|
|
|
|
printf("=== total time: %.3fms\n", tot_time); |
|
|
@@ -793,15 +798,10 @@ void run_test_st(Args &env) { |
|
|
|
in->copy_from(i.second); |
|
|
|
} |
|
|
|
|
|
|
|
warmup(); |
|
|
|
timer.reset(); |
|
|
|
func->execute(); |
|
|
|
auto exec_time = timer.get_msecs(); |
|
|
|
func->wait(); |
|
|
|
output_dumper.write_to_file(); |
|
|
|
auto cur = timer.get_msecs(); |
|
|
|
printf("%.3fms %.3fms (device=%.3f)\n", cur, exec_time, |
|
|
|
func->get_prev_exec_time() * 1e3); |
|
|
|
|
|
|
|
printf("=== going to run input for %d times\n", env.nr_run); |
|
|
|
run_iters(0); |
|
|
|
} else { |
|
|
|
// run speed test for a raw mgb graph |
|
|
|
mgb_assert(env.load_ret.tensor_map.empty(), |
|
|
|