iai_callgrind/
lib.rs

1//! Iai-Callgrind is a benchmarking framework/harness which primarily uses [Valgrind's
2//! Callgrind](https://valgrind.org/docs/manual/cl-manual.html) to provide extremely accurate and
3//! consistent measurements of Rust code, making it perfectly suited to run in environments like a
4//! CI. Iai-Callgrind is flexible and despite its name it's possible to run Cachegrind or any other
5//! Valgrind tool like DHAT in addition to or instead of Callgrind.
6//!
7//! The [online guide][Guide] contains all the details to start profiling with Iai-Callgrind.
8//!
9//! # Table of contents
10//! - [Characteristics](#characteristics)
11//! - [Benchmarking](#benchmarking)
12//!   - [Library Benchmarks](#library-benchmarks)
13//!     - [Important Default Behavior](#important-default-behavior)
14//!     - [Quickstart](#quickstart-library-benchmarks)
15//!     - [Configuration](#configuration-library-benchmarks)
16//!   - [Binary Benchmarks](#binary-benchmarks)
17//!     - [Important default behavior](#important-default-behavior)
18//!     - [Quickstart](#quickstart-binary-benchmarks)
19//!     - [Configuration](#configuration-binary-benchmarks)
20//! - [Valgrind Tools](#valgrind-tools)
21//! - [Client Requests](#client-requests)
22//! - [Flamegraphs](#flamegraphs)
23//!
24//! ## Characteristics
25//!
26//! - __Precision__: High-precision measurements allow you to reliably detect very small
27//!   optimizations of your code
28//! - __Consistency__: Iai-Callgrind can take accurate measurements even in virtualized CI
29//!   environments
30//! - __Performance__: Since Iai-Callgrind only executes a benchmark once, it is typically a lot
31//!   faster to run than benchmarks measuring the execution and wall-clock time
32//! - __Regression__: Iai-Callgrind reports the difference between benchmark runs to make it easy to
33//!   spot detailed performance regressions and improvements.
34//! - __CPU and Cache Profiling__: Iai-Callgrind generates a Callgrind profile of your code while
35//!   benchmarking, so you can use Callgrind-compatible tools like
36//!   [callgrind_annotate](https://valgrind.org/docs/manual/cl-manual.html#cl-manual.callgrind_annotate-options)
37//!   or the visualizer [kcachegrind](https://kcachegrind.github.io/html/Home.html) to analyze the
38//!   results in detail.
39//! - __Memory Profiling__: You can run other Valgrind tools like [DHAT: a dynamic heap analysis tool](https://valgrind.org/docs/manual/dh-manual.html)
40//!   and [Massif: a heap profiler](https://valgrind.org/docs/manual/ms-manual.html) with the
41//!   Iai-Callgrind benchmarking framework. Their profiles are stored next to the callgrind profiles
42//!   and are ready to be examined with analyzing tools like `dh_view.html`, `ms_print` and others.
43//! - __Visualization__: Iai-Callgrind is capable of creating regular and differential flamegraphs
44//!   from the Callgrind output format.
45//! - __Valgrind Client Requests__: Support of zero overhead [Valgrind Client Requests](https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq)
46//!   (compared to native valgrind client requests overhead) on many targets
47//! - __Stable-compatible__: Benchmark your code without installing nightly Rust
48//!
49//! ## Benchmarking
50//!
51//! `iai-callgrind` can be divided into two sections: Benchmarking the library and
52//! its public functions and benchmarking of the binaries of a crate.
53//!
54//! ### Library Benchmarks
55//!
56//! Use this scheme of the [`main`] macro if you want to benchmark functions of your
57//! crate's library.
58//!
59//! #### Important default behavior
60//!
61//! The environment variables are cleared before running a library benchmark. See also the
62//! Configuration section below if you need to change that behavior.
63//!
64//! #### Quickstart (#library-benchmarks)
65//!
66//! ```rust
67//! use iai_callgrind::{
68//!     library_benchmark, library_benchmark_group, main, LibraryBenchmarkConfig
69//! };
70//! use std::hint::black_box;
71//!
72//! // Our function we want to test. Just assume this is a public function in your
73//! // library.
74//! fn bubble_sort(mut array: Vec<i32>) -> Vec<i32> {
75//!     for i in 0..array.len() {
76//!         for j in 0..array.len() - i - 1 {
77//!             if array[j + 1] < array[j] {
78//!                 array.swap(j, j + 1);
79//!             }
80//!         }
81//!     }
82//!     array
83//! }
84//!
85//! // This function is used to create a worst case array we want to sort with our
86//! // implementation of bubble sort
87//! fn setup_worst_case_array(start: i32) -> Vec<i32> {
88//!     if start.is_negative() {
89//!         (start..0).rev().collect()
90//!     } else {
91//!         (0..start).rev().collect()
92//!     }
93//! }
94//!
95//! // The #[library_benchmark] attribute let's you define a benchmark function which you
96//! // can later use in the `library_benchmark_groups!` macro.
97//! #[library_benchmark]
98//! fn bench_bubble_sort_empty() -> Vec<i32> {
99//!     // The `black_box` is needed to tell the compiler to not optimize what's inside
100//!     // black_box or else the benchmarks might return inaccurate results.
101//!     black_box(bubble_sort(black_box(vec![])))
102//! }
103//!
104//! // This benchmark uses the `bench` attribute to setup benchmarks with different
105//! // setups. The big advantage is, that the setup costs and event counts aren't
106//! // attributed to the benchmark (and opposed to the old api we don't have to deal with
107//! // callgrind arguments, toggles, ...)
108//! #[library_benchmark]
109//! #[bench::empty(vec![])]
110//! #[bench::worst_case_6(vec![6, 5, 4, 3, 2, 1])]
111//! // Function calls are fine too
112//! #[bench::worst_case_4000(setup_worst_case_array(4000))]
113//! // The argument of the benchmark function defines the type of the argument from the
114//! // `bench` cases.
115//! fn bench_bubble_sort(array: Vec<i32>) -> Vec<i32> {
116//!     // Note `array` is not put in a `black_box` because that's already done for you.
117//!     black_box(bubble_sort(array))
118//! }
119//!
120//! // You can use the `benches` attribute to specify multiple benchmark runs in one go. You can
121//! // specify multiple `benches` attributes or mix the `benches` attribute with `bench`
122//! // attributes.
123//! #[library_benchmark]
124//! // This is the simple form. Each `,`-separated element is another benchmark run and is
125//! // passed to the benchmarking function as parameter. So, this is the same as specifying
126//! // two `#[bench]` attributes #[bench::multiple_0(vec![1])] and #[bench::multiple_1(vec![5])].
127//! #[benches::multiple(vec![1], vec![5])]
128//! // You can also use the `args` argument to achieve the same. Using `args` is necessary if you
129//! // also want to specify a `config` or `setup` function.
130//! #[benches::with_args(args = [vec![1], vec![5]], config = LibraryBenchmarkConfig::default())]
131//! // Usually, each element in `args` is passed directly to the benchmarking function. You can
132//! // instead reroute them to a `setup` function. In that case the (black boxed) return value of
133//! // the setup function is passed as parameter to the benchmarking function.
134//! #[benches::with_setup(args = [1, 5], setup = setup_worst_case_array)]
135//! fn bench_bubble_sort_with_benches_attribute(input: Vec<i32>) -> Vec<i32> {
136//!     black_box(bubble_sort(input))
137//! }
138//!
139//! // A benchmarking function with multiple parameters requires the elements to be specified as
140//! // tuples.
141//! #[library_benchmark]
142//! #[benches::multiple((1, 2), (3, 4))]
143//! fn bench_bubble_sort_with_multiple_parameters(a: i32, b: i32) -> Vec<i32> {
144//!     black_box(bubble_sort(black_box(vec![a, b])))
145//! }
146//!
147//! // A group in which we can put all our benchmark functions
148//! library_benchmark_group!(
149//!     name = bubble_sort_group;
150//!     benchmarks =
151//!         bench_bubble_sort_empty,
152//!         bench_bubble_sort,
153//!         bench_bubble_sort_with_benches_attribute,
154//!         bench_bubble_sort_with_multiple_parameters
155//! );
156//!
157//! # fn main() {
158//! // Finally, the mandatory main! macro which collects all `library_benchmark_groups`.
159//! // The main! macro creates a benchmarking harness and runs all the benchmarks defined
160//! // in the groups and benches.
161//! main!(library_benchmark_groups = bubble_sort_group);
162//! # }
163//! ```
164//!
165//! Note that it is important to annotate the benchmark functions with
166//! [`#[library_benchmark]`](crate::library_benchmark).
167//!
168//! ### Configuration (#library-benchmarks)
169//!
170//! It's possible to configure some of the behavior of `iai-callgrind`. See the docs of
171//! [`crate::LibraryBenchmarkConfig`] for more details. Configure library benchmarks at
172//! top-level with the [`crate::main`] macro, at group level within the
173//! [`crate::library_benchmark_group`], at [`crate::library_benchmark`] level
174//!
175//! and at `bench` level:
176//!
177//! ```rust
178//! # use iai_callgrind::{LibraryBenchmarkConfig, library_benchmark};
179//! #[library_benchmark]
180//! #[bench::some_id(args = (1, 2), config = LibraryBenchmarkConfig::default())]
181//! // ...
182//! # fn some_func(first: u8, second: u8) -> u8 {
183//! #    first + second
184//! # }
185//! # fn main() {}
186//! ```
187//!
188//! The config at `bench` level overwrites the config at `library_benchmark` level. The config at
189//! `library_benchmark` level overwrites the config at group level and so on. Note that
190//! configuration values like `envs` are additive and don't overwrite configuration values of higher
191//! levels.
192//!
193//! See also the docs of [`crate::library_benchmark_group`]. The [online guide][Guide] includes more
194//! explanations, common recipes and examples.
195//!
196//! ### Binary Benchmarks
197//!
198//! Use this scheme of the [`main`] macro to benchmark one or more binaries of your crate (or any
199//! other executable). The documentation for setting up binary benchmarks with the
200//! `binary_benchmark_group` macro can be found in the docs of [`crate::binary_benchmark_group`].
201//!
202//! #### Important default behavior
203//!
204//! Per default, all binary benchmarks run with the environment variables cleared. See also
205//! [`crate::BinaryBenchmarkConfig::env_clear`] for how to change this behavior.
206//!
207//! #### Quickstart (#binary-benchmarks)
208//!
209//! There are two apis to set up binary benchmarks, but we only describe the high-level api using
210//! the [`#[binary_benchmark]`](`crate::binary_benchmark`) attribute here. See the docs of
211//! [`binary_benchmark_group`] for more details about the low level api. The `#[binary_benchmark]`
212//! attribute works almost the same as the `#[library_benchmark]` attribute. You will find the same
213//! parameters `setup`, `teardown`, `config`, etc. in `#[binary_benchmark]` as in
214//! `#[library_benchmark]` and the inner attributes `#[bench]`, `#[benches]`. But, there are also
215//! substantial (differences)[#differences-to-library-benchmarks].
216//!
217//! Suppose your crate's binaries are named `my-foo` and `my-bar`
218//!
219//! ```rust
220//! # macro_rules! env { ($m:tt) => {{ "/some/path" }} }
221//! use iai_callgrind::{
222//!     main, binary_benchmark, binary_benchmark_group,
223//! };
224//! use std::path::PathBuf;
225//! use std::ffi::OsString;
226//!
227//! // In binary benchmarks there's no need to return a value from the setup function
228//! fn my_setup() {
229//!     println!("Put code in here which will be run before the actual command");
230//! }
231//!
232//! #[binary_benchmark]
233//! #[bench::just_a_fixture("benches/fixture.json")]
234//! // First big difference to library benchmarks! `my_setup` is not evaluated right away and the
235//! // return value of `my_setup` is not used as input for the `bench_foo` function. Instead,
236//! // `my_setup()` is executed before the execution of the `Command`.
237//! #[bench::with_other_fixture_and_setup(args = ("benches/other_fixture.txt"), setup = my_setup())]
238//! #[benches::multiple("benches/fix_1.txt", "benches/fix_2.txt")]
239//! // All functions annotated with `#[binary_benchmark]` need to return a `iai_callgrind::Command`
240//! fn bench_foo(path: &str) -> iai_callgrind::Command {
241//!     let path: PathBuf = path.into();
242//!     // We can put any code in here which is needed to configure the `Command`.
243//!     let stdout = if path.extension().unwrap() == "txt" {
244//!         iai_callgrind::Stdio::Inherit
245//!     } else {
246//!         iai_callgrind::Stdio::File(path.with_extension("out"))
247//!     };
248//!     // Configure the command depending on the arguments passed to this function and the code
249//!     // above
250//!     iai_callgrind::Command::new(env!("CARGO_BIN_EXE_my-foo"))
251//!         .stdout(stdout)
252//!         .arg(path)
253//!         .build()
254//! }
255//!
256//! #[binary_benchmark]
257//! // The id just needs to be unique within the same `#[binary_benchmark]`, so we can reuse
258//! // `just_a_fixture` if we want to
259//! #[bench::just_a_fixture("benches/fixture.json")]
260//! // The function can be generic, too.
261//! fn bench_bar<P>(path: P) -> iai_callgrind::Command
262//! where
263//!    P: Into<OsString>
264//! {
265//!     iai_callgrind::Command::new(env!("CARGO_BIN_EXE_my-bar"))
266//!         .arg(path)
267//!         .build()
268//! }
269//!
270//! // Put all `#[binary_benchmark]` annotated functions you want to benchmark into the `benchmarks`
271//! // section of this macro
272//! binary_benchmark_group!(
273//!     name = my_group;
274//!     benchmarks = bench_foo, bench_bar
275//! );
276//!
277//! # fn main() {
278//! // As last step specify all groups you want to benchmark in the macro argument
279//! // `binary_benchmark_groups`. As the binary_benchmark_group macro, the main macro is
280//! // always needed and finally expands to a benchmarking harness
281//! main!(binary_benchmark_groups = my_group);
282//! # }
283//! ```
284//!
285//! #### Differences to library benchmarks
286//!
287//! As opposed to library benchmarks the function annotated with the `binary_benchmark` attribute
288//! always returns a `iai_callgrind::Command`. More specifically, this function is not a benchmark
289//! function, since we don't benchmark functions anymore but [`Command`]s instead which are the
290//! return value of the [`#[binary_benchmark]`](crate::binary_benchmark) function.
291//!
292//! This change has far-reaching consequences but also simplifies things. Since the function itself
293//! is not benchmarked you can put any code into this function, and it does not influence the
294//! benchmark of the [`Command`] itself. However, this function is run only once to __build__ the
295//! [`Command`] and when we collect all commands and its configuration to be able to actually
296//! __execute__ the [`Command`]s later in the benchmark runner. Whichever code you want to run
297//! before the [`Command`] is executed has to go into the `setup`. And, into `teardown` for code you
298//! want to run after the execution of the [`Command`].
299//!
300//! In library benchmarks the `setup` argument only takes a path to a function, more specifically
301//! the function pointer. In binary benchmarks however, the `setup` (and `teardown`) parameters of
302//! the [`#[binary_benchmark]`](crate::binary_benchmark), `#[bench]` and `#[benches]` attribute
303//! take expressions which includes function calls for example `setup = my_setup()`. Only in the
304//! special case that the expression is a function pointer, we pass the `args` of the `#[bench]` and
305//! `#[benches]` attributes into the `setup`, `teardown` __and__ the function itself. Also, these
306//! expressions are not executed right away but in a separate process before the [`Command`] is
307//! executed. This is the main reason why the return value of the setup function is simply ignored
308//! and not routed back into the benchmark function as it would be the case in library benchmarks.
309//! We simply don't need to. To sum it up, put code you need to configure the [`Command`] into the
310//! annotated function and code you need to execute before (after) the execution of the [`Command`]
311//! into the `setup` (`teardown`).
312//!
313//! #### Configuration (#binary-benchmarks)
314//!
315//! Much like the configuration of library benchmarks (See above) it's possible to configure binary
316//! benchmarks at top-level in the `main!` macro and at group-level in the
317//! `binary_benchmark_groups!` with the `config = ...;` argument. In contrast to library benchmarks,
318//! binary benchmarks can be also configured at a lower and last level in [`Command`] directly.
319//!
320//! For further details see the section about binary benchmarks of the [`crate::main`] docs the docs
321//! of [`crate::binary_benchmark_group`] and [`Command`]. The [guide][Guide] of this crate includes
322//! a more thorough documentation with additional examples.
323//!
324//! ## Valgrind Tools
325//!
326//! In addition to or instead of the default Callgrind tool, you can use the Iai-Callgrind framework
327//! to run other Valgrind profiling tools like `DHAT`, `Massif`, the experimental `BBV` and even
328//! `Cachegrind`. But, also `Memcheck`, `Helgrind` and `DRD` if you need to check memory and thread
329//! safety of benchmarked code. See the [Valgrind User
330//! Manual](https://valgrind.org/docs/manual/manual.html) for details and command line arguments.
331//! The additional tools can be specified in [`LibraryBenchmarkConfig::tool`],
332//! [`BinaryBenchmarkConfig::tool`]. For example to run `DHAT` for all library benchmarks:
333//!
334//! ```rust
335//! # use iai_callgrind::{library_benchmark, library_benchmark_group};
336//! use iai_callgrind::{main, LibraryBenchmarkConfig, Dhat};
337//! # #[library_benchmark]
338//! # fn some_func() {}
339//! # library_benchmark_group!(name = some_group; benchmarks = some_func);
340//! # fn main() {
341//! main!(
342//!     config = LibraryBenchmarkConfig::default()
343//!                 .tool(Dhat::default());
344//!     library_benchmark_groups = some_group
345//! );
346//! # }
347//! ```
348//!
349//! If you're just interested in for example DHAT metrics for one or more specific benchmarks you
350//! can change the default tool wherever a configuration can be specified. Here in `main!`:
351//!
352//! ```rust
353//! # use iai_callgrind::{library_benchmark, library_benchmark_group};
354//! use iai_callgrind::{main, LibraryBenchmarkConfig, ValgrindTool};
355//! # #[library_benchmark]
356//! # fn some_func() {}
357//! # library_benchmark_group!(name = some_group; benchmarks = some_func);
358//! # fn main() {
359//! main!(
360//!     config = LibraryBenchmarkConfig::default()
361//!                 .default_tool(ValgrindTool::DHAT);
362//!     library_benchmark_groups = some_group
363//! );
364//! # }
365//! ```
366//!
367//! ## Client requests
368//!
369//! `iai-callgrind` supports valgrind client requests. See the documentation of the
370//! [`client_requests`] module for all the details.
371//!
372//! ## Flamegraphs
373//!
374//! Flamegraphs are opt-in and can be created if you pass a [`FlamegraphConfig`] to the
375//! [`Callgrind::flamegraph`]. Callgrind flamegraphs are meant as a complement to valgrind's
376//! visualization tools `callgrind_annotate` and `kcachegrind`.
377//!
378//! Callgrind flamegraphs show the inclusive costs for functions and a specific event type, much
379//! like `callgrind_annotate` does but in a nicer (and clickable) way. Especially, differential
380//! flamegraphs facilitate a deeper understanding of code sections which cause a bottleneck or a
381//! performance regressions etc.
382//!
383//! The produced flamegraph svg files are located next to the respective callgrind output file in
384//! the `target/iai` directory.
385//!
386//! [Guide]: https://iai-callgrind.github.io/iai-callgrind/latest/html/intro.html
387
388#![cfg_attr(docsrs, feature(doc_auto_cfg))]
389#![doc(test(attr(warn(unused))))]
390#![doc(test(attr(allow(unused_extern_crates))))]
391
392#[cfg(feature = "default")]
393#[doc(hidden)]
394pub mod __internal;
395#[cfg(feature = "default")]
396mod bin_bench;
397#[cfg(feature = "client_requests_defs")]
398pub mod client_requests;
399#[cfg(feature = "default")]
400mod common;
401#[cfg(feature = "default")]
402mod lib_bench;
403#[cfg(feature = "default")]
404mod macros;
405#[cfg(feature = "default")]
406pub use bin_bench::{
407    Bench, BenchmarkId, BinaryBenchmark, BinaryBenchmarkConfig, BinaryBenchmarkGroup, Command,
408    Delay, ExitWith, Sandbox,
409};
410#[cfg(feature = "default")]
411pub use bincode;
412#[cfg(feature = "default")]
413pub use common::{
414    Bbv, Cachegrind, Callgrind, Dhat, Drd, FlamegraphConfig, Helgrind, Massif, Memcheck,
415    OutputFormat,
416};
417#[cfg(feature = "client_requests_defs")]
418pub use cty;
419#[cfg(feature = "default")]
420pub use iai_callgrind_macros::{binary_benchmark, library_benchmark};
421// Only add enums here. Do not re-export structs from the runner api directly. See the
422// documentation in `__internal::mod` for more details.
423#[cfg(feature = "default")]
424pub use iai_callgrind_runner::api::{
425    CachegrindMetric, CachegrindMetrics, CallgrindMetrics, DelayKind, DhatMetric, DhatMetrics,
426    Direction, EntryPoint, ErrorMetric, EventKind, FlamegraphKind, Limit, Pipe, Stdin, Stdio,
427    ValgrindTool,
428};
429#[cfg(feature = "default")]
430pub use lib_bench::LibraryBenchmarkConfig;
iai_callgrind/lib.rs

iai_callgrind/
lib.rs