loom_rs/lib.rs
1//! # loom-rs
2//!
3//! **Weaving multiple threads together**
4//!
5//! A bespoke thread pool runtime combining tokio and rayon with CPU pinning capabilities.
6//!
7//! ## Features
8//!
9//! - **Hybrid Runtime**: Combines tokio for async I/O with rayon for CPU-bound parallel work
10//! - **CPU Pinning**: Automatically pins threads to specific CPUs for consistent performance
11//! - **Zero Allocation**: `spawn_compute()` uses per-type pools for zero allocation after warmup
12//! - **Scoped Compute**: `scope_compute()` allows borrowing local data for parallel work
13//! - **Flexible Configuration**: Configure via files (TOML/YAML/JSON), environment variables, or code
14//! - **CLI Integration**: Built-in clap support for command-line overrides
15//! - **CUDA NUMA Awareness**: Optional feature for selecting CPUs local to a GPU (Linux only)
16//!
17//! ## Quick Start
18//!
19//! ```ignore
20//! use loom_rs::LoomBuilder;
21//!
22//! fn main() -> Result<(), Box<dyn std::error::Error>> {
23//! let runtime = LoomBuilder::new()
24//! .prefix("myapp")
25//! .tokio_threads(2)
26//! .rayon_threads(6)
27//! .build()?;
28//!
29//! runtime.block_on(async {
30//! // Spawn tracked async I/O task
31//! let io_handle = runtime.spawn_async(async {
32//! // Async I/O work
33//! 42
34//! });
35//!
36//! // Spawn tracked compute task and await result (zero alloc after warmup)
37//! let result = runtime.spawn_compute(|| {
38//! // CPU-bound work on rayon
39//! (0..1000000).sum::<i64>()
40//! }).await;
41//! println!("Compute result: {}", result);
42//!
43//! // Zero-overhead parallel iterators
44//! let _sum = runtime.install(|| {
45//! use rayon::prelude::*;
46//! (0..1000).into_par_iter().sum::<i64>()
47//! });
48//!
49//! // Wait for async task
50//! let io_result = io_handle.await.unwrap();
51//! println!("I/O result: {}", io_result);
52//! });
53//!
54//! // Graceful shutdown
55//! runtime.block_until_idle();
56//!
57//! Ok(())
58//! }
59//! ```
60//!
61//! ## Ergonomic Access
62//!
63//! Use `current_runtime()` or `spawn_compute()` from anywhere in the runtime:
64//!
65//! ```ignore
66//! use loom_rs::LoomBuilder;
67//!
68//! let runtime = LoomBuilder::new().build()?;
69//!
70//! runtime.block_on(async {
71//! // No need to pass &runtime around
72//! let result = loom_rs::spawn_compute(|| expensive_work()).await;
73//!
74//! // Or get the runtime explicitly
75//! let rt = loom_rs::current_runtime().unwrap();
76//! rt.spawn_async(async { /* ... */ });
77//! });
78//! ```
79//!
80//! ## Configuration
81//!
82//! Configuration sources are merged in order (later sources override earlier):
83//!
84//! 1. Default values
85//! 2. Config files (via `.file()`)
86//! 3. Environment variables (via `.env_prefix()`)
87//! 4. Programmatic overrides
88//! 5. CLI arguments (via `.with_cli_args()`)
89//!
90//! ### Config File Example (TOML)
91//!
92//! ```toml
93//! prefix = "myapp"
94//! cpuset = "0-7,16-23"
95//! tokio_threads = 2
96//! rayon_threads = 14
97//! compute_pool_size = 64
98//! ```
99//!
100//! ### Environment Variables
101//!
102//! With `.env_prefix("LOOM")`:
103//! - `LOOM_PREFIX=myapp`
104//! - `LOOM_CPUSET=0-7`
105//! - `LOOM_TOKIO_THREADS=2`
106//! - `LOOM_RAYON_THREADS=6`
107//!
108//! ### CLI Arguments
109//!
110//! ```ignore
111//! use clap::Parser;
112//! use loom_rs::{LoomBuilder, LoomArgs};
113//!
114//! #[derive(Parser)]
115//! struct MyArgs {
116//! #[command(flatten)]
117//! loom: LoomArgs,
118//! }
119//!
120//! let args = MyArgs::parse();
121//! let runtime = LoomBuilder::new()
122//! .file("config.toml")
123//! .env_prefix("LOOM")
124//! .with_cli_args(&args.loom)
125//! .build()?;
126//! ```
127//!
128//! ## CPU Set Format
129//!
130//! The `cpuset` option accepts a string in Linux taskset/numactl format:
131//! - Single CPUs: `"0"`, `"5"`
132//! - Ranges: `"0-7"`, `"16-23"`
133//! - Mixed: `"0-3,8-11"`, `"0,2,4,6-8"`
134//!
135//! ## CUDA Support
136//!
137//! With the `cuda` feature enabled (Linux only), you can configure the runtime
138//! to use CPUs local to a specific CUDA GPU:
139//!
140//! ```ignore
141//! let runtime = LoomBuilder::new()
142//! .cuda_device_id(0) // Use CPUs near GPU 0
143//! .build()?;
144//! ```
145//!
146//! ## Thread Naming
147//!
148//! Threads are named with the configured prefix:
149//! - Tokio threads: `{prefix}-tokio-0000`, `{prefix}-tokio-0001`, ...
150//! - Rayon threads: `{prefix}-rayon-0000`, `{prefix}-rayon-0001`, ...
151
152pub(crate) mod affinity;
153pub(crate) mod bridge;
154pub mod builder;
155pub mod config;
156pub(crate) mod context;
157pub mod cpuset;
158pub mod error;
159pub mod mab;
160pub mod metrics;
161pub(crate) mod pool;
162pub mod runtime;
163pub mod stream;
164
165#[cfg(feature = "cuda")]
166pub mod cuda;
167
168pub use builder::{LoomArgs, LoomBuilder};
169pub use config::LoomConfig;
170pub use context::current_runtime;
171pub use error::{LoomError, Result};
172pub use mab::{Arm, ComputeHint, ComputeHintProvider, MabKnobs, MabScheduler};
173pub use metrics::LoomMetrics;
174pub use runtime::LoomRuntime;
175pub use stream::ComputeStreamExt;
176
177// Re-export rayon::Scope for ergonomic use with scope_compute
178pub use rayon::Scope;
179
180/// Spawn compute work using the current runtime.
181///
182/// This is a convenience function for `loom_rs::current_runtime().unwrap().spawn_compute(f)`.
183/// It allows spawning compute work from anywhere within a loom runtime without
184/// explicitly passing the runtime reference.
185///
186/// # Panics
187///
188/// Panics if called outside a loom runtime context (i.e., not within `block_on`,
189/// a tokio worker thread, or a rayon worker thread managed by the runtime).
190///
191/// # Performance
192///
193/// Same as `LoomRuntime::spawn_compute()`:
194/// - 0 bytes allocation after warmup (pool hit)
195/// - ~100-500ns overhead
196///
197/// # Example
198///
199/// ```ignore
200/// use loom_rs::LoomBuilder;
201///
202/// let runtime = LoomBuilder::new().build()?;
203///
204/// runtime.block_on(async {
205/// // No need to pass &runtime around
206/// let result = loom_rs::spawn_compute(|| {
207/// expensive_work()
208/// }).await;
209/// });
210/// ```
211pub async fn spawn_compute<F, R>(f: F) -> R
212where
213 F: FnOnce() -> R + Send + 'static,
214 R: Send + 'static,
215{
216 current_runtime()
217 .expect("spawn_compute called outside loom runtime")
218 .spawn_compute(f)
219 .await
220}
221
222/// Try to spawn compute work using the current runtime.
223///
224/// Like `spawn_compute()`, but returns `None` if not in a runtime context
225/// instead of panicking.
226///
227/// # Example
228///
229/// ```ignore
230/// if let Some(future) = loom_rs::try_spawn_compute(|| work()) {
231/// let result = future.await;
232/// }
233/// ```
234pub fn try_spawn_compute<F, R>(f: F) -> Option<impl std::future::Future<Output = R>>
235where
236 F: FnOnce() -> R + Send + 'static,
237 R: Send + 'static,
238{
239 current_runtime().map(|rt| {
240 let rt = rt;
241 async move { rt.spawn_compute(f).await }
242 })
243}
244
245/// Spawn adaptive work using the current runtime.
246///
247/// This is a convenience function for `loom_rs::current_runtime().unwrap().spawn_adaptive(f)`.
248/// Uses MAB (Multi-Armed Bandit) to learn whether to inline or offload work.
249///
250/// # Panics
251///
252/// Panics if called outside a loom runtime context.
253///
254/// # Example
255///
256/// ```ignore
257/// use loom_rs::LoomBuilder;
258///
259/// let runtime = LoomBuilder::new().build()?;
260///
261/// runtime.block_on(async {
262/// // MAB adaptively decides inline vs offload
263/// let result = loom_rs::spawn_adaptive(|| {
264/// process_work()
265/// }).await;
266/// });
267/// ```
268pub async fn spawn_adaptive<F, R>(f: F) -> R
269where
270 F: FnOnce() -> R + Send + 'static,
271 R: Send + 'static,
272{
273 current_runtime()
274 .expect("spawn_adaptive called outside loom runtime")
275 .spawn_adaptive(f)
276 .await
277}
278
279/// Spawn adaptive work with hint using the current runtime.
280///
281/// Like `spawn_adaptive()`, but provides a hint to guide cold-start behavior.
282///
283/// # Panics
284///
285/// Panics if called outside a loom runtime context.
286///
287/// # Example
288///
289/// ```ignore
290/// use loom_rs::{LoomBuilder, ComputeHint};
291///
292/// let runtime = LoomBuilder::new().build()?;
293///
294/// runtime.block_on(async {
295/// // Hint that this is likely expensive work
296/// let result = loom_rs::spawn_adaptive_with_hint(
297/// ComputeHint::High,
298/// || expensive_work()
299/// ).await;
300/// });
301/// ```
302pub async fn spawn_adaptive_with_hint<F, R>(hint: ComputeHint, f: F) -> R
303where
304 F: FnOnce() -> R + Send + 'static,
305 R: Send + 'static,
306{
307 current_runtime()
308 .expect("spawn_adaptive_with_hint called outside loom runtime")
309 .spawn_adaptive_with_hint(hint, f)
310 .await
311}
312
313/// Execute a scoped parallel computation using the current runtime.
314///
315/// This is a convenience function for `loom_rs::current_runtime().unwrap().scope_compute(f)`.
316/// It allows borrowing local variables from the async context for use in parallel work.
317///
318/// # Panics
319///
320/// Panics if called outside a loom runtime context (i.e., not within `block_on`,
321/// a tokio worker thread, or a rayon worker thread managed by the runtime).
322///
323/// # Performance
324///
325/// | Aspect | Value |
326/// |--------|-------|
327/// | Allocation | ~96 bytes per call (not pooled) |
328/// | Overhead | Comparable to `spawn_compute()` |
329///
330/// # Cancellation Safety
331///
332/// If the future is dropped before completion (e.g., via `select!` or timeout),
333/// the drop will **block** until the rayon scope finishes. This is necessary
334/// to prevent use-after-free of borrowed data.
335///
336/// # Example
337///
338/// ```ignore
339/// use loom_rs::LoomBuilder;
340/// use std::sync::atomic::{AtomicI32, Ordering};
341///
342/// let runtime = LoomBuilder::new().build()?;
343///
344/// runtime.block_on(async {
345/// let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
346/// let sum = AtomicI32::new(0);
347///
348/// // Borrow `data` and `sum` for parallel processing - no need to pass &runtime
349/// loom_rs::scope_compute(|s| {
350/// let (left, right) = data.split_at(data.len() / 2);
351/// let sum_ref = ∑
352///
353/// s.spawn(move |_| {
354/// sum_ref.fetch_add(left.iter().sum::<i32>(), Ordering::Relaxed);
355/// });
356/// s.spawn(move |_| {
357/// sum_ref.fetch_add(right.iter().sum::<i32>(), Ordering::Relaxed);
358/// });
359/// }).await;
360///
361/// // data and sum are still valid here
362/// println!("Sum of {:?} = {}", data, sum.load(Ordering::Relaxed));
363/// });
364/// ```
365pub async fn scope_compute<'env, F, R>(f: F) -> R
366where
367 F: FnOnce(&Scope<'env>) -> R + Send + 'env,
368 R: Send + 'env,
369{
370 current_runtime()
371 .expect("scope_compute called outside loom runtime")
372 .scope_compute(f)
373 .await
374}