Branch data Line data Source code
1 : : // Copyright (C) 2010 Internet Systems Consortium, Inc. ("ISC")
2 : : //
3 : : // Permission to use, copy, modify, and/or distribute this software for any
4 : : // purpose with or without fee is hereby granted, provided that the above
5 : : // copyright notice and this permission notice appear in all copies.
6 : : //
7 : : // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
8 : : // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
9 : : // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
10 : : // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
11 : : // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
12 : : // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
13 : : // PERFORMANCE OF THIS SOFTWARE.
14 : :
15 : : #ifndef __BENCHMARK_H
16 : : #define __BENCHMARK_H 1
17 : :
18 : : #include <sys/time.h>
19 : :
20 : : #include <cassert>
21 : : #include <iostream>
22 : : #include <ios>
23 : :
24 : : namespace isc {
25 : : namespace bench {
26 : :
27 : : /// \brief Templated micro benchmark framework.
28 : : ///
29 : : /// "Premature optimization is the root of all evil."
30 : : /// But programmers are often tempted to focus on performance optimization
31 : : /// too early.
32 : : /// Likewise, it's not uncommon for an engineer to introduce a minor
33 : : /// optimization with a lot of complicated code logic that actually improves
34 : : /// performance only marginally.
35 : : /// Making benchmark easier will help avoid such common pitfalls.
36 : : /// Of course, it also helps when we really need to introduce optimization
37 : : /// to identify where is the bottleneck and see how a particular optimization
38 : : /// improves the performance.
39 : : ///
40 : : /// The BenchMark class template provides a simple framework for so-called
41 : : /// "stopwatch" micro benchmarking.
42 : : /// It just encapsulates the common operations for this type of benchmark:
43 : : /// repeat a specified operation for a given number of times,
44 : : /// record the start and end times of the operations,
45 : : /// and provide major accumulated results such as the number of iterations
46 : : /// per second.
47 : : /// The main goal of this class is to help developers write benchmark test
48 : : /// cases with fewer strokes of typing.
49 : : ///
50 : : /// The constructors of a \c BenchMark class is to take the number of
51 : : /// iterations (which is referred to as \c niter below)
52 : : /// and an object (or reference to it) of the type of the template
53 : : /// parameter, \c T. Class \c T implements the benchmark target code via
54 : : /// its \c run() method, whose signature is as follows:
55 : : /// \code unsigned int T::run(); \endcode
56 : : ///
57 : : /// A BenchMark class object, via its own \c run() method, calls \c T::run()
58 : : /// for \c niter times.
59 : : /// In the simplest form \c T::run() would perform a single operation to
60 : : /// be benchmarked and returns 1.
61 : : /// In some cases, however, the operation is very lightweight (e.g. performing
62 : : /// a binary search on a moderate length of integer vector), and it may be
63 : : /// desirable to have an internal iterations within \c T::run() to avoid
64 : : /// the overhead of function calls to \c T::run().
65 : : /// In that case, \c T::run() would return the number of internal iterations
66 : : /// instead of 1.
67 : : ///
68 : : /// The \c BenchMark::run() method records some statistics %data on the
69 : : /// benchmarking, including the start and end times and the total number of
70 : : /// iterations (which is the sum of the return value of \c T::run(), and,
71 : : /// is equal to \c niter in the simplest case where \c T::run() always
72 : : /// returns 1).
73 : : /// This %data can be retried via other methods of \c BenchMark, but in
74 : : /// the primarily intended use case the \c BenchMark object would calls its
75 : : /// \c run() method at the end of its construction, and prints summarized
76 : : /// statistics to the standard output.
77 : : /// This way, the developer can only write a single line of code to perform
78 : : /// everything other than the benchmark target code (see the example below).
79 : : ///
80 : : /// \b Example
81 : : ///
82 : : /// Suppose that we want to measure performance of the search (find)
83 : : /// operation on STL set objects. We'd first define the implementation
84 : : /// class (to be the template parameter of the \c BenchMark class) as follows:
85 : : ///
86 : : /// \code class SetSearchBenchMark {
87 : : /// public:
88 : : /// SetSearchBenchMark(const set<int>& data, const vector<int>& keys) :
89 : : /// data_(data), keys_(keys)
90 : : /// {}
91 : : /// unsigned int run() {
92 : : /// vector<int>::const_iterator iter;
93 : : /// vector<int>::const_iterator end_key = keys_.end();
94 : : /// for (iter = keys_.begin(); iter != end_key; ++iter) {
95 : : /// data_.find(*iter);
96 : : /// }
97 : : /// return (keys_.size());
98 : : /// }
99 : : /// const set<int>& data_;
100 : : /// const vector<int>& keys_;
101 : : /// }; \endcode
102 : : ///
103 : : /// In its constructor the \c SetSearchBenchMark class takes a set of
104 : : /// integers (\c %data) and a vector of integers (\c keys). \c %data is
105 : : /// the STL set to be searched, and \c keys stores the search keys.
106 : : /// The constructor keeps local references to these objects.
107 : : ///
108 : : /// The \c SetSearchBenchMark::run() method, which is called via
109 : : /// \c BenchMark::run(), iterates over the key vector, and performs the
110 : : /// \c find() method of the set class for each key.
111 : : /// (This is only for performance measurement, so the result is ignored).
112 : : /// Note that this \c run() method has its own internal iterations.
113 : : /// This is because each invocation of \c find() would be pretty lightweight,
114 : : /// and the function call overhead may be relatively heavier.
115 : : /// Due to the internal iterations, this method returns the number of
116 : : /// \c find() calls, which is equal to the size of the key vector.
117 : : ///
118 : : /// Next, we should prepare test %data. In this simple example, let's assume
119 : : /// we use a fixed size: a set of 10,000 integers (from 0 to 9999), and use
120 : : /// the same number of search keys randomly chosen from that range:
121 : : /// \code
122 : : /// set<int> data_set;
123 : : /// vector<int> keys;
124 : : /// for (int i = 0; i < 10000; ++i) {
125 : : /// data_set.insert(i);
126 : : /// keys.push_back(rand() % 10000);
127 : : /// } \endcode
128 : : ///
129 : : /// Then construct a \c BenchMark<SetSearchBenchMark> object with the
130 : : /// test %data:
131 : : /// \code
132 : : /// BenchMark<SetSearchBenchMark>(100, SetSearchBenchMark(data_set, keys));
133 : : /// \endcode
134 : : /// Here we specify 100 for the number of iterations, which would cause
135 : : /// 1 million search attempts in total.
136 : : ///
137 : : /// That's it. If we put these in a C++ source file with usual necessary
138 : : /// stuff (such as \c %main()), compile it, and run the executable, then
139 : : /// we'll see something like this:
140 : : ///
141 : : /// \code Performed 1000000 iterations in 0.180172s (5550251.98ips) \endcode
142 : : ///
143 : : /// It should be obvious what this means (ips stands for "iterations
144 : : /// per second").
145 : : ///
146 : : /// A complete example program of this measurement scenario (with some
147 : : /// additional test cases and configurable parameters) can be found in
148 : : /// example/search_bench.cc.
149 : : ///
150 : : /// \b Customization
151 : : ///
152 : : /// The above simple usage should be sufficient in many benchmark cases,
153 : : /// but the \c BenchMark class provides some customization points by
154 : : /// specializing some of its (templated) public methods.
155 : : /// For example, assume you want to customize the output of benchmark result.
156 : : /// It can be done by specializing \c BenchMark::printResult():
157 : : /// \code namespace isc {
158 : : /// namespace bench {
159 : : /// template<>
160 : : /// void
161 : : /// BenchMark<SetSearchBenchMark>::printResult() const {
162 : : /// cout << "Searched for " << target_.keys_.size() << " keys "
163 : : /// << getIteration() << " times in " << getDuration() << "s" << endl;
164 : : /// }
165 : : /// }
166 : : /// } \endcode
167 : : ///
168 : : /// Then the Result would be something like this:
169 : : ///
170 : : /// \code Searched for 10000 keys 1000000 times in 0.21s \endcode
171 : : ///
172 : : /// Note that the specialization must be defined in the same namespace as
173 : : /// that of the \c BenchMark class, that is, \c isc::bench.
174 : : /// It should also be noted that the corresponding \c SetSearchBenchMark
175 : : /// object can be accessed (through its public interfaces) via the \c target_
176 : : /// member variable of \c BenchMark.
177 : : ///
178 : : /// <b>Future Plans and Compatibility Notes</b>
179 : : ///
180 : : /// Currently, benchmark developers need to write supplemental code that is
181 : : /// not directly related to benchmarks (such as \c %main()) by hand.
182 : : /// It would be better if we could minimize such development overhead.
183 : : /// In future versions we may provide a common \c %main() function and
184 : : /// option parsers, thereby allowing the developer to only write the benchmark
185 : : /// classes and invoke them, just like what various unit test frameworks do.
186 : : ///
187 : : /// If and when we implement it, some existing benchmark cases may need to be
188 : : /// adjusted.
189 : : template <typename T>
190 : : class BenchMark {
191 : : ///
192 : : /// \name Constructors
193 : : ///
194 : : /// Note: The copy constructor and the assignment operator are
195 : : /// intentionally defined as private, making this class non-copyable.
196 : : /// We use the default destructor.
197 : : //@{
198 : : private:
199 : : BenchMark(const BenchMark& source);
200 : : BenchMark& operator=(const BenchMark& source);
201 : : public:
202 : : /// \brief Constructor for immediate run.
203 : : ///
204 : : /// This is the constructor that is expected to be used normally.
205 : : /// It runs the benchmark within the constructor and prints the result,
206 : : /// thereby making it possible to do everything with a single line of
207 : : /// code (see the above example).
208 : : ///
209 : : /// \param iterations The number of iterations. The \c run() method will
210 : : /// be called this number of times.
211 : : /// \param target The templated class object that
212 : : /// implements the code to be benchmarked.
213 : : BenchMark(const int iterations, T target) :
214 : : iterations_(iterations), sub_iterations_(0), target_(NULL)
215 : : {
216 : : initialize(target, true);
217 : : }
218 : :
219 : : /// \brief Constructor for finer-grained control.
220 : : ///
221 : : /// This constructor takes the third parameter, \c immediate, to control
222 : : /// whether to run the benchmark within the constructor.
223 : : /// It also takes a reference to the templated class object rather than
224 : : /// an object copy, so if the copy overhead isn't acceptable this
225 : : /// constructor must be used.
226 : : ///
227 : : /// \param iterations The number of iterations. The \c run() method will
228 : : /// be called this number of times.
229 : : /// \param target A reference to the templated class object that
230 : : /// implements the code to be benchmarked.
231 : : /// \param immediate If \c true the benchmark will be performed within
232 : : /// the constructor; otherwise it only does initialization.
233 : 2 : BenchMark(const int iterations, T& target, const bool immediate) :
234 : 2 : iterations_(iterations), sub_iterations_(0), target_(&target)
235 : : {
236 : 2 : initialize(target, immediate);
237 : 2 : }
238 : : //@}
239 : :
240 : : /// \brief Hook to be called before starting benchmark.
241 : : ///
242 : : /// This method will be called from \c run() before starting the benchmark.
243 : : /// By default it's empty, but can be customized via template
244 : : /// specialization. When specialized, a reference to the target object
245 : : /// given to the constructor will be passed to the implementation.
246 : : void setUp(T&) {}
247 : :
248 : : /// \brief Hook to be called after benchmark.
249 : : ///
250 : : /// This method will be called from \c run() when the benchmark completes.
251 : : /// By default it's empty, but can be customized via template
252 : : /// specialization. When specialized, a reference to the target object
253 : : /// given to the constructor will be passed to the implementation.
254 : : void tearDown(T&) {}
255 : :
256 : : /// \brief Perform benchmark.
257 : : ///
258 : : /// This method first calls \c setUp().
259 : : /// It then records the current time, calls \c T::run() for the number
260 : : /// of times specified on construction, and records the time on completion.
261 : : /// Finally, it calls \c tearDown().
262 : 2 : void run() {
263 [ - + ]: 2 : assert(target_ != NULL);
264 : 2 : run(*target_);
265 : 2 : }
266 : :
267 : : /// \brief Print the benchmark result.
268 : : ///
269 : : /// This method prints the benchmark result in a common style to the
270 : : /// standard out. The result contains the number of total iterations,
271 : : /// the duration of the test, and the number of iterations per second
272 : : /// calculated from the previous two parameters.
273 : : ///
274 : : /// A call to this method is only meaningful after the completion of
275 : : /// \c run(). The behavior is undefined in other cases.
276 : : void printResult() const {
277 : : std::cout.precision(6);
278 : 0 : std::cout << "Performed " << getIteration() << " iterations in "
279 : : << std::fixed << getDuration() << "s";
280 : : std::cout.precision(2);
281 : 0 : std::cout << " (" << std::fixed << getIterationPerSecond() << "ips)"
282 : : << std::endl;
283 : : }
284 : :
285 : : /// \brief Return the number of iterations.
286 : : ///
287 : : /// It returns the total iterations of benchmark, which is the sum
288 : : /// of the return value of \c T::run() over all calls to it
289 : : /// (note that it may not equal to the number of calls to \c T::run(),
290 : : /// which was specified on construction of this class).
291 : : ///
292 : : /// A call to this method is only meaningful after the completion of
293 : : /// \c run(). The behavior is undefined in other cases.
294 : 0 : unsigned int getIteration() const { return (sub_iterations_); }
295 : :
296 : : /// \brief Return the duration of benchmark in seconds.
297 : : ///
298 : : /// The highest possible precision of this value is microseconds.
299 : : ///
300 : : /// A call to this method is only meaningful after the completion of
301 : : /// \c run(). The behavior is undefined in other cases.
302 : 0 : double getDuration() const {
303 : : return (tv_diff_.tv_sec +
304 : 0 : static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION);
305 : : }
306 : :
307 : : /// \brief Return the average duration per iteration in seconds.
308 : : ///
309 : : /// The highest possible precision of this value is microseconds.
310 : : /// The iteration is the sum of the return value of \c T::run() over
311 : : /// all calls to it (note that it may not equal to the number of calls
312 : : /// to \c T::run()).
313 : : ///
314 : : /// If it cannot calculate the average, it returns \c TIME_FAILURE.
315 : : ///
316 : : /// A call to this method is only meaningful after the completion of
317 : : /// \c run(). The behavior is undefined in other cases.
318 : : double getAverageTime() const {
319 [ - + + - : 3 : if (sub_iterations_ == 0) {
+ - ]
320 : : return (TIME_FAILURE);
321 : : }
322 : : return ((tv_diff_.tv_sec +
323 : : static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION ) /
324 : 2 : sub_iterations_);
325 : : }
326 : :
327 : : /// \brief Return the number of possible iterations per second based on
328 : : /// the benchmark result.
329 : : ///
330 : : /// If it cannot calculate that number (e.g. because the duration is
331 : : /// too small) it returns \c ITERATION_FAILURE.
332 : : /// A call to this method is only meaningful after the completion of
333 : : /// \c run(). The behavior is undefined in other cases.
334 : : double getIterationPerSecond() const {
335 : : const double duration_usec = tv_diff_.tv_sec +
336 : 2 : static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION;
337 [ # # + - : 2 : if (duration_usec == 0) {
+ - ]
338 : : return (ITERATION_FAILURE);
339 : : }
340 : 2 : return (sub_iterations_ / duration_usec);
341 : : }
342 : : public:
343 : : /// \brief A constant that indicates a failure in \c getAverageTime().
344 : : ///
345 : : /// This constant be used as double but is defined as int so that it can
346 : : /// be initialized within the class definition. Type conversion will be
347 : : /// performed implicitly.
348 : : static const int TIME_FAILURE = -1;
349 : :
350 : : /// \brief A constant that indicates a failure in
351 : : /// \c getIterationPerSecond().
352 : : ///
353 : : /// This constant be used as double but is defined as int so that it can
354 : : /// be initialized within the class definition. Type conversion will be
355 : : /// performed implicitly.
356 : : static const int ITERATION_FAILURE = -1;
357 : : private:
358 : 2 : void run(T& target) {
359 : : setUp(target);
360 : :
361 : : struct timeval beg, end;
362 : 2 : gettimeofday(&beg, NULL);
363 [ + + ]: 4 : for (unsigned int i = 0; i < iterations_; ++i) {
364 : 4 : sub_iterations_ += target.run();
365 : : }
366 : 2 : gettimeofday(&end, NULL);
367 : 2 : tv_diff_ = tv_subtract(end, beg);
368 : :
369 : : tearDown(target);
370 : 2 : }
371 : :
372 : : void initialize(T& target, const bool immediate) {
373 [ - + ]: 2 : if (immediate) {
374 : 0 : run(target);
375 : : printResult();
376 : : }
377 : : }
378 : : private:
379 : : // return t1 - t2
380 : 0 : struct timeval tv_subtract(const struct timeval& t1,
381 : : const struct timeval& t2)
382 : : {
383 : : struct timeval result;
384 : :
385 : 2 : result.tv_sec = t1.tv_sec - t2.tv_sec;
386 [ # # + - ]: 2 : if (t1.tv_usec >= t2.tv_usec) {
387 : 2 : result.tv_usec = t1.tv_usec- t2.tv_usec;
388 : : } else {
389 : 0 : result.tv_usec = ONE_MILLION + t1.tv_usec - t2.tv_usec;
390 : 0 : --result.tv_sec;
391 : : }
392 : :
393 : 0 : return (result);
394 : : }
395 : : private:
396 : : static const int ONE_MILLION = 1000000;
397 : : const unsigned int iterations_;
398 : : unsigned int sub_iterations_;
399 : : T* target_;
400 : : struct timeval tv_diff_;
401 : : };
402 : :
403 : : }
404 : : }
405 : : #endif // __BENCHMARK_H
406 : :
407 : : // Local Variables:
408 : : // mode: c++
409 : : // End:
|