loom_rs/
lib.rs

1//! # loom-rs
2//!
3//! **Weaving multiple threads together**
4//!
5//! A bespoke thread pool runtime combining tokio and rayon with CPU pinning capabilities.
6//!
7//! ## Features
8//!
9//! - **Hybrid Runtime**: Combines tokio for async I/O with rayon for CPU-bound parallel work
10//! - **CPU Pinning**: Automatically pins threads to specific CPUs for consistent performance
11//! - **Zero Allocation**: `spawn_compute()` uses per-type pools for zero allocation after warmup
12//! - **Scoped Compute**: `scope_compute()` allows borrowing local data for parallel work
13//! - **Flexible Configuration**: Configure via files (TOML/YAML/JSON), environment variables, or code
14//! - **CLI Integration**: Built-in clap support for command-line overrides
15//! - **CUDA NUMA Awareness**: Optional feature for selecting CPUs local to a GPU (Linux only)
16//!
17//! ## Quick Start
18//!
19//! ```ignore
20//! use loom_rs::LoomBuilder;
21//!
22//! fn main() -> Result<(), Box<dyn std::error::Error>> {
23//!     let runtime = LoomBuilder::new()
24//!         .prefix("myapp")
25//!         .tokio_threads(2)
26//!         .rayon_threads(6)
27//!         .build()?;
28//!
29//!     runtime.block_on(async {
30//!         // Spawn tracked async I/O task
31//!         let io_handle = runtime.spawn_async(async {
32//!             // Async I/O work
33//!             42
34//!         });
35//!
36//!         // Spawn tracked compute task and await result (zero alloc after warmup)
37//!         let result = runtime.spawn_compute(|| {
38//!             // CPU-bound work on rayon
39//!             (0..1000000).sum::<i64>()
40//!         }).await;
41//!         println!("Compute result: {}", result);
42//!
43//!         // Zero-overhead parallel iterators
44//!         let _sum = runtime.install(|| {
45//!             use rayon::prelude::*;
46//!             (0..1000).into_par_iter().sum::<i64>()
47//!         });
48//!
49//!         // Wait for async task
50//!         let io_result = io_handle.await.unwrap();
51//!         println!("I/O result: {}", io_result);
52//!     });
53//!
54//!     // Graceful shutdown
55//!     runtime.block_until_idle();
56//!
57//!     Ok(())
58//! }
59//! ```
60//!
61//! ## Ergonomic Access
62//!
63//! Use `current_runtime()` or `spawn_compute()` from anywhere in the runtime:
64//!
65//! ```ignore
66//! use loom_rs::LoomBuilder;
67//!
68//! let runtime = LoomBuilder::new().build()?;
69//!
70//! runtime.block_on(async {
71//!     // No need to pass &runtime around
72//!     let result = loom_rs::spawn_compute(|| expensive_work()).await;
73//!
74//!     // Or get the runtime explicitly
75//!     let rt = loom_rs::current_runtime().unwrap();
76//!     rt.spawn_async(async { /* ... */ });
77//! });
78//! ```
79//!
80//! ## Configuration
81//!
82//! Configuration sources are merged in order (later sources override earlier):
83//!
84//! 1. Default values
85//! 2. Config files (via `.file()`)
86//! 3. Environment variables (via `.env_prefix()`)
87//! 4. Programmatic overrides
88//! 5. CLI arguments (via `.with_cli_args()`)
89//!
90//! ### Config File Example (TOML)
91//!
92//! ```toml
93//! prefix = "myapp"
94//! cpuset = "0-7,16-23"
95//! tokio_threads = 2
96//! rayon_threads = 14
97//! compute_pool_size = 64
98//! ```
99//!
100//! ### Environment Variables
101//!
102//! With `.env_prefix("LOOM")`:
103//! - `LOOM_PREFIX=myapp`
104//! - `LOOM_CPUSET=0-7`
105//! - `LOOM_TOKIO_THREADS=2`
106//! - `LOOM_RAYON_THREADS=6`
107//!
108//! ### CLI Arguments
109//!
110//! ```ignore
111//! use clap::Parser;
112//! use loom_rs::{LoomBuilder, LoomArgs};
113//!
114//! #[derive(Parser)]
115//! struct MyArgs {
116//!     #[command(flatten)]
117//!     loom: LoomArgs,
118//! }
119//!
120//! let args = MyArgs::parse();
121//! let runtime = LoomBuilder::new()
122//!     .file("config.toml")
123//!     .env_prefix("LOOM")
124//!     .with_cli_args(&args.loom)
125//!     .build()?;
126//! ```
127//!
128//! ## CPU Set Format
129//!
130//! The `cpuset` option accepts a string in Linux taskset/numactl format:
131//! - Single CPUs: `"0"`, `"5"`
132//! - Ranges: `"0-7"`, `"16-23"`
133//! - Mixed: `"0-3,8-11"`, `"0,2,4,6-8"`
134//!
135//! ## CUDA Support
136//!
137//! With the `cuda` feature enabled (Linux only), you can configure the runtime
138//! to use CPUs local to a specific CUDA GPU:
139//!
140//! ```ignore
141//! let runtime = LoomBuilder::new()
142//!     .cuda_device_id(0)  // Use CPUs near GPU 0
143//!     .build()?;
144//! ```
145//!
146//! ## Thread Naming
147//!
148//! Threads are named with the configured prefix:
149//! - Tokio threads: `{prefix}-tokio-0000`, `{prefix}-tokio-0001`, ...
150//! - Rayon threads: `{prefix}-rayon-0000`, `{prefix}-rayon-0001`, ...
151
152pub(crate) mod affinity;
153pub(crate) mod bridge;
154pub mod builder;
155pub mod config;
156pub(crate) mod context;
157pub mod cpuset;
158pub mod error;
159pub mod mab;
160pub mod metrics;
161pub(crate) mod pool;
162pub mod runtime;
163pub mod stream;
164
165#[cfg(feature = "cuda")]
166pub mod cuda;
167
168pub use builder::{LoomArgs, LoomBuilder};
169pub use config::LoomConfig;
170pub use context::current_runtime;
171pub use error::{LoomError, Result};
172pub use mab::{Arm, ComputeHint, ComputeHintProvider, MabKnobs, MabScheduler};
173pub use metrics::LoomMetrics;
174pub use runtime::LoomRuntime;
175pub use stream::ComputeStreamExt;
176
177// Re-export rayon::Scope for ergonomic use with scope_compute
178pub use rayon::Scope;
179
180/// Spawn compute work using the current runtime.
181///
182/// This is a convenience function for `loom_rs::current_runtime().unwrap().spawn_compute(f)`.
183/// It allows spawning compute work from anywhere within a loom runtime without
184/// explicitly passing the runtime reference.
185///
186/// # Panics
187///
188/// Panics if called outside a loom runtime context (i.e., not within `block_on`,
189/// a tokio worker thread, or a rayon worker thread managed by the runtime).
190///
191/// # Performance
192///
193/// Same as `LoomRuntime::spawn_compute()`:
194/// - 0 bytes allocation after warmup (pool hit)
195/// - ~100-500ns overhead
196///
197/// # Example
198///
199/// ```ignore
200/// use loom_rs::LoomBuilder;
201///
202/// let runtime = LoomBuilder::new().build()?;
203///
204/// runtime.block_on(async {
205///     // No need to pass &runtime around
206///     let result = loom_rs::spawn_compute(|| {
207///         expensive_work()
208///     }).await;
209/// });
210/// ```
211pub async fn spawn_compute<F, R>(f: F) -> R
212where
213    F: FnOnce() -> R + Send + 'static,
214    R: Send + 'static,
215{
216    current_runtime()
217        .expect("spawn_compute called outside loom runtime")
218        .spawn_compute(f)
219        .await
220}
221
222/// Try to spawn compute work using the current runtime.
223///
224/// Like `spawn_compute()`, but returns `None` if not in a runtime context
225/// instead of panicking.
226///
227/// # Example
228///
229/// ```ignore
230/// if let Some(future) = loom_rs::try_spawn_compute(|| work()) {
231///     let result = future.await;
232/// }
233/// ```
234pub fn try_spawn_compute<F, R>(f: F) -> Option<impl std::future::Future<Output = R>>
235where
236    F: FnOnce() -> R + Send + 'static,
237    R: Send + 'static,
238{
239    current_runtime().map(|rt| {
240        let rt = rt;
241        async move { rt.spawn_compute(f).await }
242    })
243}
244
245/// Spawn adaptive work using the current runtime.
246///
247/// This is a convenience function for `loom_rs::current_runtime().unwrap().spawn_adaptive(f)`.
248/// Uses MAB (Multi-Armed Bandit) to learn whether to inline or offload work.
249///
250/// # Panics
251///
252/// Panics if called outside a loom runtime context.
253///
254/// # Example
255///
256/// ```ignore
257/// use loom_rs::LoomBuilder;
258///
259/// let runtime = LoomBuilder::new().build()?;
260///
261/// runtime.block_on(async {
262///     // MAB adaptively decides inline vs offload
263///     let result = loom_rs::spawn_adaptive(|| {
264///         process_work()
265///     }).await;
266/// });
267/// ```
268pub async fn spawn_adaptive<F, R>(f: F) -> R
269where
270    F: FnOnce() -> R + Send + 'static,
271    R: Send + 'static,
272{
273    current_runtime()
274        .expect("spawn_adaptive called outside loom runtime")
275        .spawn_adaptive(f)
276        .await
277}
278
279/// Spawn adaptive work with hint using the current runtime.
280///
281/// Like `spawn_adaptive()`, but provides a hint to guide cold-start behavior.
282///
283/// # Panics
284///
285/// Panics if called outside a loom runtime context.
286///
287/// # Example
288///
289/// ```ignore
290/// use loom_rs::{LoomBuilder, ComputeHint};
291///
292/// let runtime = LoomBuilder::new().build()?;
293///
294/// runtime.block_on(async {
295///     // Hint that this is likely expensive work
296///     let result = loom_rs::spawn_adaptive_with_hint(
297///         ComputeHint::High,
298///         || expensive_work()
299///     ).await;
300/// });
301/// ```
302pub async fn spawn_adaptive_with_hint<F, R>(hint: ComputeHint, f: F) -> R
303where
304    F: FnOnce() -> R + Send + 'static,
305    R: Send + 'static,
306{
307    current_runtime()
308        .expect("spawn_adaptive_with_hint called outside loom runtime")
309        .spawn_adaptive_with_hint(hint, f)
310        .await
311}
312
313/// Execute a scoped parallel computation using the current runtime.
314///
315/// This is a convenience function for `loom_rs::current_runtime().unwrap().scope_compute(f)`.
316/// It allows borrowing local variables from the async context for use in parallel work.
317///
318/// # Panics
319///
320/// Panics if called outside a loom runtime context (i.e., not within `block_on`,
321/// a tokio worker thread, or a rayon worker thread managed by the runtime).
322///
323/// # Performance
324///
325/// | Aspect | Value |
326/// |--------|-------|
327/// | Allocation | ~96 bytes per call (not pooled) |
328/// | Overhead | Comparable to `spawn_compute()` |
329///
330/// # Cancellation Safety
331///
332/// If the future is dropped before completion (e.g., via `select!` or timeout),
333/// the drop will **block** until the rayon scope finishes. This is necessary
334/// to prevent use-after-free of borrowed data.
335///
336/// # Example
337///
338/// ```ignore
339/// use loom_rs::LoomBuilder;
340/// use std::sync::atomic::{AtomicI32, Ordering};
341///
342/// let runtime = LoomBuilder::new().build()?;
343///
344/// runtime.block_on(async {
345///     let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
346///     let sum = AtomicI32::new(0);
347///
348///     // Borrow `data` and `sum` for parallel processing - no need to pass &runtime
349///     loom_rs::scope_compute(|s| {
350///         let (left, right) = data.split_at(data.len() / 2);
351///         let sum_ref = &sum;
352///
353///         s.spawn(move |_| {
354///             sum_ref.fetch_add(left.iter().sum::<i32>(), Ordering::Relaxed);
355///         });
356///         s.spawn(move |_| {
357///             sum_ref.fetch_add(right.iter().sum::<i32>(), Ordering::Relaxed);
358///         });
359///     }).await;
360///
361///     // data and sum are still valid here
362///     println!("Sum of {:?} = {}", data, sum.load(Ordering::Relaxed));
363/// });
364/// ```
365pub async fn scope_compute<'env, F, R>(f: F) -> R
366where
367    F: FnOnce(&Scope<'env>) -> R + Send + 'env,
368    R: Send + 'env,
369{
370    current_runtime()
371        .expect("scope_compute called outside loom runtime")
372        .scope_compute(f)
373        .await
374}
loom_rs/lib.rs

loom_rs/
lib.rs