Skip to content

Commit f649efe

Browse files
committed
Add min rel accuracy stopping criterion
Clean up the initial commit Further cleaning of initial commit. Add test. Improvements to comments thanks to review Reformat thanks to clang format. Static cast to avoid conversion warning
1 parent 192ef10 commit f649efe

11 files changed

+224
-20
lines changed

include/benchmark/benchmark.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,12 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
127127
}
128128
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129129
130-
Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131-
benchmark. This option overrides the `benchmark_min_time` flag.
130+
Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how
131+
long to run the benchmark. This option overrides the `benchmark_min_time` flag.
132+
133+
If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)`
134+
to set the required minimum relative accuracy used to determine how long to run
135+
the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag.
132136
133137
void BM_test(benchmark::State& state) {
134138
... body ...
@@ -1230,11 +1234,21 @@ class BENCHMARK_EXPORT Benchmark {
12301234
// multiplier kRangeMultiplier will be used.
12311235
Benchmark* RangeMultiplier(int multiplier);
12321236

1233-
// Set the minimum amount of time to use when running this benchmark. This
1234-
// option overrides the `benchmark_min_time` flag.
1237+
// Set the minimum amount of time to use to determine the required number
1238+
// of iterations when running this benchmark. This option overrides
1239+
// the `benchmark_min_time` flag.
12351240
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
12361241
Benchmark* MinTime(double t);
12371242

1243+
// Set the minimum relative accuracy to use to determine the required number
1244+
// of iterations when running this benchmark. This option overrides
1245+
// the `benchmark_min_rel_accuracy` flag.
1246+
// REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and
1247+
// time is measured manually, i.e., `UseManualTime` has been called on this
1248+
// benchmark and each benchmark iteration should call
1249+
// `SetIterationTime(seconds)` to report the measured time.
1250+
Benchmark* MinRelAccuracy(double r);
1251+
12381252
// Set the minimum amount of time to run the benchmark before taking runtimes
12391253
// of this benchmark into account. This
12401254
// option overrides the `benchmark_min_warmup_time` flag.
@@ -1360,6 +1374,7 @@ class BENCHMARK_EXPORT Benchmark {
13601374

13611375
int range_multiplier_;
13621376
double min_time_;
1377+
double min_rel_accuracy_;
13631378
double min_warmup_time_;
13641379
IterationCount iterations_;
13651380
int repetitions_;
@@ -1805,6 +1820,7 @@ struct BENCHMARK_EXPORT BenchmarkName {
18051820
std::string function_name;
18061821
std::string args;
18071822
std::string min_time;
1823+
std::string min_rel_accuracy;
18081824
std::string min_warmup_time;
18091825
std::string iterations;
18101826
std::string repetitions;
@@ -1844,6 +1860,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
18441860
threads(1),
18451861
time_unit(GetDefaultTimeUnit()),
18461862
real_accumulated_time(0),
1863+
manual_accumulated_time_pow2(0),
18471864
cpu_accumulated_time(0),
18481865
max_heapbytes_used(0),
18491866
use_real_time_for_initial_big_o(false),
@@ -1872,6 +1889,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
18721889
int64_t repetitions;
18731890
TimeUnit time_unit;
18741891
double real_accumulated_time;
1892+
double manual_accumulated_time_pow2;
18751893
double cpu_accumulated_time;
18761894

18771895
// Return a value representing the real time per iteration in the unit

src/benchmark.cc

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,12 @@ BM_DEFINE_bool(benchmark_list_tests, false);
6868
// linked into the binary are run.
6969
BM_DEFINE_string(benchmark_filter, "");
7070

71-
// Specification of how long to run the benchmark.
71+
// Specification of either an exact number of iterations (specified as
72+
// `<integer>x`) or a minimum number of seconds (specified as `<float>s`) used
73+
// to determine how long to run the benchmark.
7274
//
73-
// It can be either an exact number of iterations (specified as `<integer>x`),
74-
// or a minimum number of seconds (specified as `<float>s`). If the latter
75-
// format (ie., min seconds) is used, the system may run the benchmark longer
76-
// until the results are considered significant.
75+
// If the latter format (ie., min seconds) is used, the system may run
76+
// the benchmark longer until the results are considered significant.
7777
//
7878
// For backward compatibility, the `s` suffix may be omitted, in which case,
7979
// the specified number is interpreted as the number of seconds.
@@ -84,6 +84,19 @@ BM_DEFINE_string(benchmark_filter, "");
8484
// benchmark execution, regardless of number of threads.
8585
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
8686

87+
// Specification of required relative accuracy used to determine how
88+
// long to run the benchmark.
89+
//
90+
// REQUIRES: time is measured manually.
91+
//
92+
// Manual timers provide per-iteration times. The relative accuracy is
93+
// measured as the standard deviation of these per-iteration times divided by
94+
// the mean and the square root of the number of iterations. The benchmark is
95+
// run until both of the following conditions are fulfilled:
96+
// 1. the specified minimum time or number of iterations is reached
97+
// 2. the measured relative accuracy meets the specified requirement
98+
BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0);
99+
87100
// Minimum number of seconds a benchmark should be run before results should be
88101
// taken into account. This e.g can be necessary for benchmarks of code which
89102
// needs to fill some form of cache before performance is of interest.
@@ -97,7 +110,7 @@ BM_DEFINE_int32(benchmark_repetitions, 1);
97110

98111
// If enabled, forces each benchmark to execute exactly one iteration and one
99112
// repetition, bypassing any configured
100-
// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
113+
// MinTime()/MinRelAccuracy()/MinWarmUpTime()/Iterations()/Repetitions()
101114
BM_DEFINE_bool(benchmark_dry_run, false);
102115

103116
// If set, enable random interleaving of repetitions of all benchmarks.
@@ -759,6 +772,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
759772
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
760773
ParseStringFlag(argv[i], "benchmark_min_time",
761774
&FLAGS_benchmark_min_time) ||
775+
ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy",
776+
&FLAGS_benchmark_min_rel_accuracy) ||
762777
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
763778
&FLAGS_benchmark_min_warmup_time) ||
764779
ParseInt32Flag(argv[i], "benchmark_repetitions",
@@ -877,7 +892,8 @@ void PrintDefaultHelp() {
877892
"benchmark"
878893
" [--benchmark_list_tests={true|false}]\n"
879894
" [--benchmark_filter=<regex>]\n"
880-
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
895+
" [--benchmark_min_time=`<integer>x` OR `<float>s`]\n"
896+
" [--benchmark_min_rel_accuracy=<min_rel_accuracy>]\n"
881897
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
882898
" [--benchmark_repetitions=<num_repetitions>]\n"
883899
" [--benchmark_dry_run={true|false}]\n"

src/benchmark_api_internal.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ BenchmarkInstance::BenchmarkInstance(benchmark::Benchmark* benchmark,
2626
statistics_(benchmark_.statistics_),
2727
repetitions_(benchmark_.repetitions_),
2828
min_time_(benchmark_.min_time_),
29+
min_rel_accuracy_(benchmark_.min_rel_accuracy_),
2930
min_warmup_time_(benchmark_.min_warmup_time_),
3031
iterations_(benchmark_.iterations_),
3132
threads_(thread_count),
@@ -54,6 +55,11 @@ BenchmarkInstance::BenchmarkInstance(benchmark::Benchmark* benchmark,
5455
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
5556
}
5657

58+
if (!IsZero(benchmark->min_rel_accuracy_)) {
59+
name_.min_rel_accuracy =
60+
StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_);
61+
}
62+
5763
if (!IsZero(benchmark->min_warmup_time_)) {
5864
name_.min_warmup_time =
5965
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);

src/benchmark_api_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class BenchmarkInstance {
3636
const std::vector<Statistics>& statistics() const { return statistics_; }
3737
int repetitions() const { return repetitions_; }
3838
double min_time() const { return min_time_; }
39+
double min_rel_accuracy() const { return min_rel_accuracy_; }
3940
double min_warmup_time() const { return min_warmup_time_; }
4041
IterationCount iterations() const { return iterations_; }
4142
int threads() const { return threads_; }
@@ -67,6 +68,7 @@ class BenchmarkInstance {
6768
const std::vector<Statistics>& statistics_;
6869
int repetitions_;
6970
double min_time_;
71+
double min_rel_accuracy_;
7072
double min_warmup_time_;
7173
IterationCount iterations_;
7274
int threads_; // Number of concurrent threads to us

src/benchmark_register.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ Benchmark::Benchmark(const std::string& name)
221221
use_default_time_unit_(true),
222222
range_multiplier_(kRangeMultiplier),
223223
min_time_(0),
224+
min_rel_accuracy_(0),
224225
min_warmup_time_(0),
225226
iterations_(0),
226227
repetitions_(0),
@@ -377,6 +378,14 @@ Benchmark* Benchmark::MinTime(double t) {
377378
return this;
378379
}
379380

381+
Benchmark* Benchmark::MinRelAccuracy(double r) {
382+
BM_CHECK(r > 0.0);
383+
BM_CHECK(iterations_ == 0);
384+
BM_CHECK(use_manual_time_);
385+
min_rel_accuracy_ = r;
386+
return this;
387+
}
388+
380389
Benchmark* Benchmark::MinWarmUpTime(double t) {
381390
BM_CHECK(t >= 0.0);
382391
BM_CHECK(iterations_ == 0);

src/benchmark_runner.cc

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ namespace benchmark {
6060

6161
BM_DECLARE_bool(benchmark_dry_run);
6262
BM_DECLARE_string(benchmark_min_time);
63+
BM_DECLARE_double(benchmark_min_rel_accuracy);
6364
BM_DECLARE_double(benchmark_min_warmup_time);
6465
BM_DECLARE_int32(benchmark_repetitions);
6566
BM_DECLARE_bool(benchmark_report_aggregates_only);
@@ -103,6 +104,7 @@ BenchmarkReporter::Run CreateRunReport(
103104
if (report.skipped == 0u) {
104105
if (b.use_manual_time()) {
105106
report.real_accumulated_time = results.manual_time_used;
107+
report.manual_accumulated_time_pow2 = results.manual_time_used_pow2;
106108
} else {
107109
report.real_accumulated_time = results.real_time_used;
108110
}
@@ -159,6 +161,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
159161
results.cpu_time_used += timer.cpu_time_used();
160162
results.real_time_used += timer.real_time_used();
161163
results.manual_time_used += timer.manual_time_used();
164+
results.manual_time_used_pow2 += timer.manual_time_used_pow2();
162165
results.complexity_n += st.complexity_length_n();
163166
internal::Increment(&results.counters, st.counters);
164167
}
@@ -286,6 +289,11 @@ BenchmarkRunner::BenchmarkRunner(
286289
min_time(FLAGS_benchmark_dry_run
287290
? 0
288291
: ComputeMinTime(b_, parsed_benchtime_flag)),
292+
min_rel_accuracy(FLAGS_benchmark_dry_run
293+
? std::numeric_limits<double>::max()
294+
: (!IsZero(b.min_rel_accuracy())
295+
? b.min_rel_accuracy()
296+
: FLAGS_benchmark_min_rel_accuracy)),
289297
min_warmup_time(
290298
FLAGS_benchmark_dry_run
291299
? 0
@@ -356,8 +364,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
356364

357365
// Base decisions off of real time if requested by this benchmark.
358366
i.seconds = i.results.cpu_time_used;
367+
i.seconds_pow2 = 0;
359368
if (b.use_manual_time()) {
360369
i.seconds = i.results.manual_time_used;
370+
i.seconds_pow2 = i.results.manual_time_used_pow2;
361371
} else if (b.use_real_time()) {
362372
i.seconds = i.results.real_time_used;
363373
}
@@ -378,6 +388,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded(
378388
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
379389
multiplier = is_significant ? multiplier : 10.0;
380390

391+
if (!IsZero(GetMinRelAccuracy())) {
392+
multiplier =
393+
std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy());
394+
}
395+
381396
// So what seems to be the sufficiently-large iteration count? Round up.
382397
const IterationCount max_next_iters = static_cast<IterationCount>(
383398
std::llround(std::max(multiplier * static_cast<double>(i.iters),
@@ -395,14 +410,12 @@ bool BenchmarkRunner::ShouldReportIterationResults(
395410
// Either it has run for a sufficient amount of time
396411
// or because an error was reported.
397412
return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run ||
398-
i.iters >= kMaxIterations || // Too many iterations already.
399-
i.seconds >=
400-
GetMinTimeToApply() || // The elapsed time is large enough.
401-
// CPU time is specified but the elapsed real time greatly exceeds
402-
// the minimum time.
403-
// Note that user provided timers are except from this test.
404-
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
405-
!b.use_manual_time());
413+
// Too many iterations already.
414+
i.iters >= kMaxIterations ||
415+
// We have applied for enough time and the relative accuracy is good
416+
// enough. Relative accuracy is checked only for user provided timers.
417+
(HasSufficientTimeToApply(i) &&
418+
(!b.use_manual_time() || HasSufficientRelAccuracy(i)));
406419
}
407420

408421
double BenchmarkRunner::GetMinTimeToApply() const {
@@ -414,6 +427,28 @@ double BenchmarkRunner::GetMinTimeToApply() const {
414427
return warmup_done ? min_time : min_warmup_time;
415428
}
416429

430+
double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const {
431+
return std::sqrt(i.seconds_pow2 -
432+
std::pow(i.seconds, 2.) / static_cast<double>(i.iters)) /
433+
i.seconds;
434+
}
435+
436+
bool BenchmarkRunner::HasSufficientTimeToApply(
437+
const IterationResults& i) const {
438+
return i.seconds >= GetMinTimeToApply() ||
439+
// CPU time is specified but the elapsed real time greatly exceeds
440+
// the minimum time.
441+
// Note that user provided timers are except from this test.
442+
(!b.use_manual_time() &&
443+
i.results.real_time_used >= 5 * GetMinTimeToApply());
444+
}
445+
446+
bool BenchmarkRunner::HasSufficientRelAccuracy(
447+
const IterationResults& i) const {
448+
return (IsZero(GetMinRelAccuracy()) ||
449+
((GetRelAccuracy(i) <= GetMinRelAccuracy()) && (i.iters >= 2)));
450+
}
451+
417452
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
418453
warmup_done = true;
419454
iters = i;

src/benchmark_runner.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class BenchmarkRunner {
7171

7272
double GetMinTime() const { return min_time; }
7373

74+
double GetMinRelAccuracy() const { return min_rel_accuracy; }
75+
7476
bool HasExplicitIters() const { return has_explicit_iteration_count; }
7577

7678
IterationCount GetIters() const { return iters; }
@@ -83,6 +85,7 @@ class BenchmarkRunner {
8385

8486
BenchTimeType parsed_benchtime_flag;
8587
const double min_time;
88+
const double min_rel_accuracy;
8689
const double min_warmup_time;
8790
bool warmup_done;
8891
const int repeats;
@@ -102,6 +105,7 @@ class BenchmarkRunner {
102105
internal::ThreadManager::Result results;
103106
IterationCount iters;
104107
double seconds;
108+
double seconds_pow2;
105109
};
106110
IterationResults DoNIterations();
107111

@@ -115,6 +119,12 @@ class BenchmarkRunner {
115119

116120
double GetMinTimeToApply() const;
117121

122+
double GetRelAccuracy(const IterationResults& i) const;
123+
124+
bool HasSufficientTimeToApply(const IterationResults& i) const;
125+
126+
bool HasSufficientRelAccuracy(const IterationResults& i) const;
127+
118128
void FinishWarmUp(const IterationCount& i);
119129

120130
void RunWarmUp();

src/thread_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class ThreadManager {
2626
double real_time_used = 0;
2727
double cpu_time_used = 0;
2828
double manual_time_used = 0;
29+
double manual_time_used_pow2 = 0;
2930
int64_t complexity_n = 0;
3031
std::string report_label_;
3132
std::string skip_message_;

src/thread_timer.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ class ThreadTimer {
3838
}
3939

4040
// Called by each thread
41-
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
41+
void SetIterationTime(double seconds) {
42+
manual_time_used_ += seconds;
43+
manual_time_used_pow2_ += std::pow(seconds, 2.);
44+
}
4245

4346
bool running() const { return running_; }
4447

@@ -60,6 +63,11 @@ class ThreadTimer {
6063
return manual_time_used_;
6164
}
6265

66+
double manual_time_used_pow2() const {
67+
BM_CHECK(!running_);
68+
return manual_time_used_pow2_;
69+
}
70+
6371
private:
6472
double ReadCpuTimerOfChoice() const {
6573
if (measure_process_cpu_time) return ProcessCPUUsage();
@@ -78,6 +86,7 @@ class ThreadTimer {
7886
double cpu_time_used_ = 0;
7987
// Manually set iteration time. User sets this with SetIterationTime(seconds).
8088
double manual_time_used_ = 0;
89+
double manual_time_used_pow2_ = 0;
8190
};
8291

8392
} // namespace internal

test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_
109109
compile_benchmark_test(benchmark_min_time_flag_iters_test)
110110
benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
111111

112+
compile_benchmark_test(benchmark_min_rel_accuracy_flag_test)
113+
benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test)
114+
112115
add_filter_test(filter_simple "Foo" 3)
113116
add_filter_test(filter_simple_negative "-Foo" 2)
114117
add_filter_test(filter_suffix "BM_.*" 4)

0 commit comments

Comments
 (0)