par_bench/
lib.rs

1//! Multi-threaded benchmark execution framework for performance testing.
2//!
3//! This package provides utilities to execute multi-threaded benchmarks with precise control
4//! over thread groups, state management, and measurement timing. It is designed to integrate
5//! with benchmarking frameworks like Criterion while handling the complexities of coordinated
6//! multi-threaded execution.
7//!
8//! The core functionality includes:
9//! - [`Run`] - Configurable multi-threaded benchmark execution with builder pattern API
10//! - [`ThreadPool`] - Pre-warmed thread pool to eliminate thread creation overhead in benchmarks
11//! - [`RunMeta`] - Metadata about the benchmark run, including group information and iteration counts
12//! - [`RunSummary`] - Results from benchmark execution, including timing and measurement data
13//!
14//! This package is not meant for use in production, serving only as a development tool for
15//! benchmarking and performance analysis.
16//!
17//! # Operating Principles
18//!
19//! ## Thread Groups
20//!
21//! Benchmarks can divide threads into equal-sized groups, allowing for scenarios where different
22//! groups perform different roles (e.g., readers vs writers, producers vs consumers). Each thread
23//! receives metadata about which group it belongs to and can behave differently based on this.
24//!
25//! ## State Management
26//!
27//! The framework supports multiple levels of state:
28//! - **Thread State**: Created once per thread, shared across all iterations
29//! - **Iteration State**: Created for each iteration, allowing per-iteration setup
30//! - **Cleanup State**: Returned by iteration functions, dropped after measurement
31//!
32//! ## Measurement Timing
33//!
34//! Measurement wrappers allow precise control over what gets measured. The framework separates
35//! preparation (unmeasured) from execution (measured) phases, ensuring benchmarks capture only
36//! the intended work.
37//!
38//! # Basic Example
39//!
40//! ```
41//! use std::sync::Arc;
42//! use std::sync::atomic::{AtomicU64, Ordering};
43//!
44//! use many_cpus::ProcessorSet;
45//! use par_bench::{Run, ThreadPool};
46//!
47//! # fn main() {
48//! // Create a thread pool with default processor set
49//! let mut pool = ThreadPool::new(&ProcessorSet::default());
50//!
51//! // Shared counter for all threads to increment
52//! let counter = Arc::new(AtomicU64::new(0));
53//!
54//! let run = Run::new()
55//!     .prepare_thread({
56//!         let counter = Arc::clone(&counter);
57//!         move |_| Arc::clone(&counter)
58//!     })
59//!     .prepare_iter(|args| Arc::clone(args.thread_state()))
60//!     .iter(|mut args| {
61//!         // This is the measured work
62//!         args.iter_state().fetch_add(1, Ordering::Relaxed);
63//!     });
64//!
65//! // Execute 1000 iterations across all threads
66//! let results = run.execute_on(&mut pool, 1000);
67//! println!("Mean duration: {:?}", results.mean_duration());
68//! # }
69//! ```
70//!
71//! # Multi-Group Example
72//!
73//! ```
74//! use std::sync::Arc;
75//! use std::sync::atomic::{AtomicU64, Ordering};
76//!
77//! use many_cpus::ProcessorSet;
78//! use new_zealand::nz;
79//! use par_bench::{Run, ThreadPool};
80//!
81//! # fn main() {
82//! # if let Some(processors) = ProcessorSet::builder().take(nz!(4)) {
83//! let mut pool = ThreadPool::new(&processors);
84//!
85//! let reader_count = Arc::new(AtomicU64::new(0));
86//! let writer_count = Arc::new(AtomicU64::new(0));
87//!
88//! let run = Run::new()
89//!     .groups(nz!(2)) // Divide 4 threads into 2 groups of 2 threads each
90//!     .prepare_thread({
91//!         let reader_count = Arc::clone(&reader_count);
92//!         let writer_count = Arc::clone(&writer_count);
93//!         move |args| {
94//!             if args.meta().group_index() == 0 {
95//!                 ("reader", Arc::clone(&reader_count))
96//!             } else {
97//!                 ("writer", Arc::clone(&writer_count))
98//!             }
99//!         }
100//!     })
101//!     .prepare_iter(|args| args.thread_state().clone())
102//!     .iter(|mut args| {
103//!         let (role, counter) = args.take_iter_state();
104//!         match role {
105//!             "reader" => {
106//!                 // Reader work
107//!                 counter.fetch_add(1, Ordering::Relaxed);
108//!             }
109//!             "writer" => {
110//!                 // Writer work
111//!                 counter.fetch_add(10, Ordering::Relaxed);
112//!             }
113//!             _ => unreachable!(),
114//!         }
115//!     });
116//!
117//! let results = run.execute_on(&mut pool, 100);
118//! println!("Results: {:?}", results.mean_duration());
119//! # }
120//! # }
121//! ```
122//!
123//! # Resource Usage Tracking
124//!
125//! When either the `alloc_tracker` or `all_the_time` features are enabled, the [`ResourceUsageExt`]
126//! extension trait becomes available, providing convenient resource usage tracking for benchmarks:
127//!
128//! ```ignore
129//! use alloc_tracker::{Allocator, Session as AllocSession};
130//! use all_the_time::Session as TimeSession;
131//! use par_bench::{ResourceUsageExt, Run, ThreadPool};
132//!
133//! #[global_allocator]
134//! static ALLOCATOR: Allocator<std::alloc::System> = Allocator::system();
135//!
136//! let allocs = AllocSession::new();
137//! let processor_time = TimeSession::new();
138//! let mut pool = ThreadPool::new(&ProcessorSet::default());
139//!
140//! let results = Run::new()
141//!     .measure_resource_usage(|measure| {
142//!         measure
143//!             .allocs(&allocs, "my_operation")
144//!             .processor_time(&processor_time, "my_operation")
145//!     })
146//!     .iter(|_| {
147//!         let _data = vec![1, 2, 3, 4, 5]; // This allocates memory
148//!         
149//!         // Perform processor-intensive work
150//!         let mut sum = 0;
151//!         for i in 0..1000 {
152//!             sum += i * i;
153//!         }
154//!         std::hint::black_box(sum);
155//!     })
156//!     .execute_on(&mut pool, 1000);
157//!
158//! // Access the combined resource usage data
159//! for output in results.measure_outputs() {
160//!     if let Some(alloc_report) = output.allocs() {
161//!         println!("Allocation data available");
162//!     }
163//!     if let Some(time_report) = output.processor_time() {
164//!         println!("Processor time data available");
165//!     }
166//! }
167//! ```
168//!
169//! You can also use just one type of measurement:
170//!
171//! ```ignore
172//! // Just allocation tracking
173//! let results = Run::new()
174//!     .measure_resource_usage(|measure| {
175//!         measure.allocs(&allocs, "alloc_only")
176//!     })
177//!     .iter(|_| { /* work */ })
178//!     .execute_on(&mut pool, 1000);
179//!
180//! // Just processor time tracking
181//! let results = Run::new()
182//!     .measure_resource_usage(|measure| {
183//!         measure.processor_time(&processor_time, "time_only")
184//!     })
185//!     .iter(|_| { /* work */ })
186//!     .execute_on(&mut pool, 1000);
187//! ```
188
189mod run;
190mod run_configured;
191mod run_configured_criterion;
192mod run_meta;
193mod threadpool;
194
195// These are in a separate module because 99% of the time the user never needs to name
196// these types, so it makes sense to de-emphasize them in the API documentation.
197pub mod args;
198pub mod configure;
199
200#[cfg(any(feature = "alloc_tracker", feature = "all_the_time"))]
201mod resource_usage_ext;
202
203#[cfg(any(feature = "alloc_tracker", feature = "all_the_time"))]
204pub use resource_usage_ext::*;
205pub use run::*;
206pub use run_configured::*;
207pub use run_meta::*;
208pub use threadpool::*;