Skip to main content

ptab/
lib.rs

1//! A lock-free, cache-line-aware concurrent table.
2//!
3//! `ptab` provides [`PTab`], a fixed-capacity table for storing entries and
4//! accessing them by opaque indices. It is optimized for read-heavy workloads
5//! where entries are looked up frequently from multiple threads simultaneously.
6//!
7//! # Overview
8//!
9//! The table assigns each inserted value a [`Detached`] index that uniquely
10//! identifies that entry. This index can be used to look up, access, or remove
11//! the entry. Indices include a generational component to mitigate the
12//! [ABA problem] in concurrent algorithms.
13//!
14//! # Usage
15//!
16//! ```
17//! use ptab::PTab;
18//!
19//! // Create a table with default capacity
20//! let table: PTab<String> = PTab::new();
21//!
22//! // Insert an entry and get its index
23//! let index = table.insert("hello".to_string()).unwrap();
24//!
25//! // Access the entry by index
26//! let greeting = table.with(index, |s| s.to_uppercase());
27//! assert_eq!(greeting, Some("HELLO".to_string()));
28//!
29//! // Remove the entry
30//! assert!(table.remove(index));
31//!
32//! // The entry is gone
33//! assert!(!table.exists(index));
34//! ```
35//!
36//! # Configuration
37//!
38//! Table capacity is configured at compile time through the [`Params`] trait.
39//! The default configuration ([`DefaultParams`]) provides [`Capacity::DEF`]
40//! slots:
41//!
42//! ```
43//! use ptab::{PTab, DefaultParams};
44//!
45//! // These are equivalent:
46//! let table1: PTab<u64> = PTab::new();
47//! let table2: PTab<u64, DefaultParams> = PTab::new();
48//! ```
49//!
50//! For custom capacities, use [`ConstParams`]:
51//!
52//! ```
53//! use ptab::{PTab, ConstParams};
54//!
55//! let table: PTab<u64, ConstParams<512>> = PTab::new();
56//! assert_eq!(table.capacity(), 512);
57//! ```
58//!
59//! Capacity is always rounded up to the nearest power of two and clamped
60//! to the range <code>[Capacity::MIN]..=[Capacity::MAX]</code>.
61//!
62//! # Concurrency
63//!
64//! All operations on [`PTab`] are thread-safe and lock-free. Multiple threads
65//! can concurrently insert, remove, and access entries without blocking.
66//!
67//! ```no_run
68//! use ptab::{PTab, ConstParams};
69//! use std::sync::Arc;
70//! use std::thread;
71//!
72//! let table: Arc<PTab<u64, ConstParams<1024>>> = Arc::new(PTab::new());
73//!
74//! let handles: Vec<_> = (0..4)
75//!   .map(|thread_id| {
76//!     let table = Arc::clone(&table);
77//!     thread::spawn(move || {
78//!       for i in 0..100 {
79//!         if let Some(idx) = table.insert(thread_id * 1000 + i) {
80//!           table.remove(idx);
81//!         }
82//!       }
83//!     })
84//!   })
85//!   .collect();
86//!
87//! for handle in handles {
88//!   handle.join().unwrap();
89//! }
90//! ```
91//!
92//! ## Memory Reclamation
93//!
94//! Removed entries are reclaimed using a pluggable memory management strategy.
95//! By default, epoch-based reclamation via [`sdd`] ensures safe concurrent
96//! access. In no-std environments, a leak-based fallback is available.
97//!
98//! See [`memory`] for reclamation trait details and custom implementations.
99//!
100//! # Memory Layout
101//!
102//! The table uses a cache-line-aware memory layout to minimize false sharing
103//! between threads. Consecutive allocations are distributed across different
104//! cache lines, reducing contention when multiple threads operate on
105//! recently-allocated entries. See [`CACHE_LINE_SLOTS`] for the distribution
106//! stride.
107//!
108//! # Capacity Limits
109//!
110//! Capacity is bounded by [`Capacity::MIN`] and [`Capacity::MAX`]. The default
111//! is [`Capacity::DEF`]. When full, [`PTab::insert()`] returns [`None`].
112//!
113//! [Capacity::MAX]: crate::params::Capacity::MAX
114//! [Capacity::MIN]: crate::params::Capacity::MIN
115//! [`CACHE_LINE_SLOTS`]: crate::params::CACHE_LINE_SLOTS
116//! [`Capacity::DEF`]: crate::params::Capacity::DEF
117//! [`Capacity::MAX`]: crate::params::Capacity::MAX
118//! [`Capacity::MIN`]: crate::params::Capacity::MIN
119//! [`ConstParams`]: crate::params::ConstParams
120//! [`DefaultParams`]: crate::params::DefaultParams
121//! [`Params`]: crate::params::Params
122//! [`PTab::insert()`]: crate::public::PTab::insert
123//! [`memory`]: crate::memory
124//!
125//! [ABA problem]: https://en.wikipedia.org/wiki/ABA_problem
126//! [`sdd`]: https://docs.rs/sdd
127//!
128
129#![cfg_attr(not(feature = "std"), no_std)]
130#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
131#![cfg_attr(docsrs, feature(doc_cfg))]
132
133#[cfg(all(feature = "alloc", not(feature = "std")))]
134extern crate alloc as rust_alloc;
135
136#[cfg(not(any(feature = "std", feature = "alloc")))]
137compile_error!("expected either `std` or `alloc` feature to be enabled");
138
139mod array;
140mod index;
141mod padded;
142mod params;
143mod public;
144mod reclaim;
145mod table;
146mod utils;
147
148pub(crate) use crate::utils::alloc;
149pub(crate) use crate::utils::sync;
150
151pub mod implementation {
152  #![doc = "# Implementation Notes\n\nThis document describes the design and implementation of `ptab`, a lock-free concurrent table optimized for read-heavy workloads.\n\n## Background\n\nThe design is inspired by the Erlang/OTP process table implementation, documented in the BEAM source code. The problem it solves is mapping process identifiers to process structures in a highly concurrent environment where:\n\n- Lookups vastly outnumber insertions and deletions\n- Multiple threads perform lookups simultaneously\n- Lookups must not modify any shared state (no reference counting)\n- The data structure must scale with CPU count\n\nTraditional approaches using locks or lock-free structures with reference counting suffer from cache-line contention. Even a simple atomic increment of a reference counter requires the cache line to bounce between all CPUs performing lookups, creating a severe scalability bottleneck.\n\n## Design Goals\n\n1. **Zero-contention reads**: Lookups must not write to any shared memory. This is the single most important property for scalability.\n\n2. **Cache-line aware layout**: Adjacent slots reside in different cache lines to avoid false sharing when multiple threads insert concurrently.\n\n3. **Generational indices**: Reused slots produce different indices to mitigate ABA problems in concurrent algorithms.\n\n4. **Globally sequential allocation**: New indices are assigned in a consistent order across all threads, preserving the property that comparing two indices reveals their relative creation order.\n\n## Architecture Overview\n\n```text\n\u{250c}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2510}\n\u{2502}                            PTab<T, P>                           \u{2502}\n\u{251c}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2524}\n\u{2502}  \u{250c}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2510}    \u{250c}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2510} \u{2502}\n\u{2502}  \u{2502}   Volatile (mutable)  \u{2502}    \u{2502}     ReadOnly (immutable)      \u{2502} \u{2502}\n\u{2502}  \u{2502}   [cache-padded]      \u{2502}    \u{2502}     [cache-padded]            \u{2502} \u{2502}\n\u{2502}  \u{251c}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2524}    \u{251c}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2524} \u{2502}\n\u{2502}  \u{2502} entries: AtomicU32    \u{2502}    \u{2502} data: Array<AtomicOwned<T>>   \u{2502} \u{2502}\n\u{2502}  \u{2502} next_id: AtomicU32    \u{2502}    \u{2502} slot: Array<AtomicUsize>      \u{2502} \u{2502}\n\u{2502}  \u{2502} free_id: AtomicU32    \u{2502}    \u{2502}                               \u{2502} \u{2502}\n\u{2502}  \u{2514}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2518}    \u{2514}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2518} \u{2502}\n\u{2514}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2518}\n```\n\nThe table splits into two sections:\n\n- **Volatile**: Counters modified during operations. Grouped together and cache-padded to isolate from read-only data.\n\n- **ReadOnly**: The storage arrays. Despite the name, individual slots are modified atomically, but the arrays themselves are allocated once and never resized.\n\n## Index Structure\n\nIndices come in three forms encoding the same information differently:\n\n### `Detached` Index\n\nThe public-facing index type. Encodes both a slot identifier and a generational component in a single `usize`. The bit layout interleaves these components to enable efficient conversion to concrete indices.\n\n### `Abstract` Index\n\nA sequential counter representing allocation order. Abstract index 0 is the first allocation, 1 the second, and so on. When a slot is freed and reallocated, it receives a new abstract index (old index plus capacity), incrementing its generation.\n\n### `Concrete` Index\n\nThe actual offset into storage arrays. Multiple abstract indices map to the same concrete index (differing only in generation). The mapping from abstract to concrete distributes consecutive allocations across cache lines.\n\n### Index Conversions\n\n```text\nAbstract Index <------> Detached Index\n      |                       |\n      |                       |\n      v                       v\nConcrete Index <--------------+\n```\n\nConversions use bit manipulation with precomputed masks and shifts derived from `Params::LENGTH` at compile time.\n\n## Cache-Line Distribution\n\nA naive table places slots 0, 1, 2, ... in consecutive memory locations. When multiple threads allocate simultaneously, they write to adjacent slots, likely in the same cache line, causing contention.\n\nInstead, `ptab` distributes consecutive allocations across cache lines:\n\n```text\nCache Line 0:  slot 0,  slot 8,  slot 16, slot 24, ...\nCache Line 1:  slot 1,  slot 9,  slot 17, slot 25, ...\nCache Line 2:  slot 2,  slot 10, slot 18, slot 26, ...\n   ...\nCache Line 7:  slot 7,  slot 15, slot 23, slot 31, ...\n```\n\nThe abstract-to-concrete mapping implements this transformation:\n\n```rust,ignore\nconcrete = (abstract & MASK_BLOCK) << SHIFT_BLOCK\n         + (abstract >> SHIFT_INDEX) & MASK_INDEX\n```\n\nWhere `MASK_BLOCK` selects which cache line and `MASK_INDEX` selects position within that cache line. Constants derive from `CACHE_LINE_SLOTS`.\n\nThis ensures only true conflicts trigger communication between processors, avoiding false sharing.\n\n## References\n\n- [Erlang/OTP](https://github.com/erlang/otp)\n"include_str!("../IMPLEMENTATION.md")]
153}
154
155pub mod config {
156  //! Configuration parameters which can be used to override the default table
157  //! settings.
158
159  pub use crate::params::CACHE_LINE;
160  pub use crate::params::CACHE_LINE_SLOTS;
161  pub use crate::params::Capacity;
162  pub use crate::params::ConstParams;
163  pub use crate::params::DebugParams;
164  pub use crate::params::DefaultParams;
165  pub use crate::params::Params;
166  pub use crate::params::ParamsExt;
167}
168
169pub mod memory {
170  //! Memory reclamation traits and built-in strategies.
171  //!
172  //! This module exposes the internal reclamation API used by `ptab`. You do
173  //! not need to interact with these traits directly unless implementing
174  //! [`Params`] or your own custom collector.
175  //!
176  //! [`Params`]: crate::params::Params
177
178  pub use crate::reclaim::Atomic;
179  pub use crate::reclaim::Collector;
180  pub use crate::reclaim::CollectorWeak;
181  pub use crate::reclaim::Shared;
182  pub use crate::reclaim::collector;
183}
184
185#[doc(inline)]
186pub use self::config::Capacity;
187
188#[doc(inline)]
189pub use self::config::ConstParams;
190
191#[doc(inline)]
192pub use self::config::DefaultParams;
193
194#[doc(inline)]
195pub use self::config::Params;
196
197pub use self::index::Detached;
198
199pub use self::public::PTab;
200pub use self::public::WeakKeys;