par_bench/lib.rs
1//! Multi-threaded benchmark execution framework for performance testing.
2//!
3//! This package provides utilities to execute multi-threaded benchmarks with precise control
4//! over thread groups, state management, and measurement timing. It is designed to integrate
5//! with benchmarking frameworks like Criterion while handling the complexities of coordinated
6//! multi-threaded execution.
7//!
8//! The core functionality includes:
9//! - [`Run`] - Configurable multi-threaded benchmark execution with builder pattern API
10//! - [`ThreadPool`] - Pre-warmed thread pool to eliminate thread creation overhead in benchmarks
11//! - [`RunMeta`] - Metadata about the benchmark run, including group information and iteration counts
12//! - [`RunSummary`] - Results from benchmark execution, including timing and measurement data
13//!
14//! This package is not meant for use in production, serving only as a development tool for
15//! benchmarking and performance analysis.
16//!
17//! # Operating Principles
18//!
19//! ## Thread Groups
20//!
21//! Benchmarks can divide threads into equal-sized groups, allowing for scenarios where different
22//! groups perform different roles (e.g., readers vs writers, producers vs consumers). Each thread
23//! receives metadata about which group it belongs to and can behave differently based on this.
24//!
25//! ## State Management
26//!
27//! The framework supports multiple levels of state:
28//! - **Thread State**: Created once per thread, shared across all iterations
29//! - **Iteration State**: Created for each iteration, allowing per-iteration setup
30//! - **Cleanup State**: Returned by iteration functions, dropped after measurement
31//!
32//! ## Measurement Timing
33//!
34//! Measurement wrappers allow precise control over what gets measured. The framework separates
35//! preparation (unmeasured) from execution (measured) phases, ensuring benchmarks capture only
36//! the intended work.
37//!
38//! # Basic Example
39//!
40//! ```
41//! use std::sync::Arc;
42//! use std::sync::atomic::{AtomicU64, Ordering};
43//!
44//! use many_cpus::ProcessorSet;
45//! use par_bench::{Run, ThreadPool};
46//!
47//! # fn main() {
48//! // Create a thread pool with default processor set
49//! let mut pool = ThreadPool::new(&ProcessorSet::default());
50//!
51//! // Shared counter for all threads to increment
52//! let counter = Arc::new(AtomicU64::new(0));
53//!
54//! let run = Run::new()
55//! .prepare_thread({
56//! let counter = Arc::clone(&counter);
57//! move |_| Arc::clone(&counter)
58//! })
59//! .prepare_iter(|args| Arc::clone(args.thread_state()))
60//! .iter(|mut args| {
61//! // This is the measured work
62//! args.iter_state().fetch_add(1, Ordering::Relaxed);
63//! });
64//!
65//! // Execute 1000 iterations across all threads
66//! let results = run.execute_on(&mut pool, 1000);
67//! println!("Mean duration: {:?}", results.mean_duration());
68//! # }
69//! ```
70//!
71//! # Multi-Group Example
72//!
73//! ```
74//! use std::sync::Arc;
75//! use std::sync::atomic::{AtomicU64, Ordering};
76//!
77//! use many_cpus::ProcessorSet;
78//! use new_zealand::nz;
79//! use par_bench::{Run, ThreadPool};
80//!
81//! # fn main() {
82//! # if let Some(processors) = ProcessorSet::builder().take(nz!(4)) {
83//! let mut pool = ThreadPool::new(&processors);
84//!
85//! let reader_count = Arc::new(AtomicU64::new(0));
86//! let writer_count = Arc::new(AtomicU64::new(0));
87//!
88//! let run = Run::new()
89//! .groups(nz!(2)) // Divide 4 threads into 2 groups of 2 threads each
90//! .prepare_thread({
91//! let reader_count = Arc::clone(&reader_count);
92//! let writer_count = Arc::clone(&writer_count);
93//! move |args| {
94//! if args.meta().group_index() == 0 {
95//! ("reader", Arc::clone(&reader_count))
96//! } else {
97//! ("writer", Arc::clone(&writer_count))
98//! }
99//! }
100//! })
101//! .prepare_iter(|args| args.thread_state().clone())
102//! .iter(|mut args| {
103//! let (role, counter) = args.take_iter_state();
104//! match role {
105//! "reader" => {
106//! // Reader work
107//! counter.fetch_add(1, Ordering::Relaxed);
108//! }
109//! "writer" => {
110//! // Writer work
111//! counter.fetch_add(10, Ordering::Relaxed);
112//! }
113//! _ => unreachable!(),
114//! }
115//! });
116//!
117//! let results = run.execute_on(&mut pool, 100);
118//! println!("Results: {:?}", results.mean_duration());
119//! # }
120//! # }
121//! ```
122//!
123//! # Resource Usage Tracking
124//!
125//! When either the `alloc_tracker` or `all_the_time` features are enabled, the [`ResourceUsageExt`]
126//! extension trait becomes available, providing convenient resource usage tracking for benchmarks:
127//!
128//! ```ignore
129//! use alloc_tracker::{Allocator, Session as AllocSession};
130//! use all_the_time::Session as TimeSession;
131//! use par_bench::{ResourceUsageExt, Run, ThreadPool};
132//!
133//! #[global_allocator]
134//! static ALLOCATOR: Allocator<std::alloc::System> = Allocator::system();
135//!
136//! let allocs = AllocSession::new();
137//! let processor_time = TimeSession::new();
138//! let mut pool = ThreadPool::new(&ProcessorSet::default());
139//!
140//! let results = Run::new()
141//! .measure_resource_usage(|measure| {
142//! measure
143//! .allocs(&allocs, "my_operation")
144//! .processor_time(&processor_time, "my_operation")
145//! })
146//! .iter(|_| {
147//! let _data = vec![1, 2, 3, 4, 5]; // This allocates memory
148//!
149//! // Perform processor-intensive work
150//! let mut sum = 0;
151//! for i in 0..1000 {
152//! sum += i * i;
153//! }
154//! std::hint::black_box(sum);
155//! })
156//! .execute_on(&mut pool, 1000);
157//!
158//! // Access the combined resource usage data
159//! for output in results.measure_outputs() {
160//! if let Some(alloc_report) = output.allocs() {
161//! println!("Allocation data available");
162//! }
163//! if let Some(time_report) = output.processor_time() {
164//! println!("Processor time data available");
165//! }
166//! }
167//! ```
168//!
169//! You can also use just one type of measurement:
170//!
171//! ```ignore
172//! // Just allocation tracking
173//! let results = Run::new()
174//! .measure_resource_usage(|measure| {
175//! measure.allocs(&allocs, "alloc_only")
176//! })
177//! .iter(|_| { /* work */ })
178//! .execute_on(&mut pool, 1000);
179//!
180//! // Just processor time tracking
181//! let results = Run::new()
182//! .measure_resource_usage(|measure| {
183//! measure.processor_time(&processor_time, "time_only")
184//! })
185//! .iter(|_| { /* work */ })
186//! .execute_on(&mut pool, 1000);
187//! ```
188
189mod run;
190mod run_configured;
191mod run_configured_criterion;
192mod run_meta;
193mod threadpool;
194
195// These are in a separate module because 99% of the time the user never needs to name
196// these types, so it makes sense to de-emphasize them in the API documentation.
197pub mod args;
198pub mod configure;
199
200#[cfg(any(feature = "alloc_tracker", feature = "all_the_time"))]
201mod resource_usage_ext;
202
203#[cfg(any(feature = "alloc_tracker", feature = "all_the_time"))]
204pub use resource_usage_ext::*;
205pub use run::*;
206pub use run_configured::*;
207pub use run_meta::*;
208pub use threadpool::*;